diff --git a/.github/scripts/__tests__/keepalive-loop.test.js b/.github/scripts/__tests__/keepalive-loop.test.js
index 0e34cee77..cd51462b9 100644
--- a/.github/scripts/__tests__/keepalive-loop.test.js
+++ b/.github/scripts/__tests__/keepalive-loop.test.js
@@ -1989,3 +1989,123 @@ test('normaliseChecklistSection preserves non-list content', () => {
 
   assert.equal(result, expected);
 });
+
+test('updateKeepaliveLoopSummary displays LLM provider analysis details', async () => {
+  const existingState = formatStateComment({
+    trace: 'trace-llm',
+    iteration: 1,
+    max_iterations: 5,
+    failure_threshold: 3,
+  });
+  const github = buildGithubStub({
+    comments: [{ id: 77, body: existingState, html_url: 'https://example.com/77' }],
+  });
+  await updateKeepaliveLoopSummary({
+    github,
+    context: buildContext(123),
+    core: buildCore(),
+    inputs: {
+      prNumber: 123,
+      action: 'run',
+      runResult: 'success',
+      gateConclusion: 'success',
+      tasksTotal: 4,
+      tasksUnchecked: 2,
+      keepaliveEnabled: true,
+      autofixEnabled: false,
+      iteration: 1,
+      maxIterations: 5,
+      failureThreshold: 3,
+      trace: 'trace-llm',
+      llm_provider: 'github-models',
+      llm_confidence: 0.95,
+      llm_analysis_run: true,
+    },
+  });
+
+  assert.equal(github.actions.length, 1);
+  assert.equal(github.actions[0].type, 'update');
+  assert.match(github.actions[0].body, /### 🧠 Task Analysis/);
+  assert.match(github.actions[0].body, /GitHub Models \(primary\)/);
+  assert.match(github.actions[0].body, /Confidence \| 95%/);
+});
+
+test('updateKeepaliveLoopSummary shows fallback warning for OpenAI provider', async () => {
+  const existingState = formatStateComment({
+    trace: 'trace-openai',
+    iteration: 1,
+    max_iterations: 5,
+    failure_threshold: 3,
+  });
+  const github = buildGithubStub({
+    comments: [{ id: 78, body: existingState, html_url: 'https://example.com/78' }],
+  });
+  await updateKeepaliveLoopSummary({
+    github,
+    context: buildContext(123),
+    core: buildCore(),
+    inputs: {
+      prNumber: 123,
+      action: 'run',
+      runResult: 'success',
+      gateConclusion: 'success',
+      tasksTotal: 4,
+      tasksUnchecked: 2,
+      keepaliveEnabled: true,
+      autofixEnabled: false,
+      iteration: 1,
+      maxIterations: 5,
+      failureThreshold: 3,
+      trace: 'trace-openai',
+      llm_provider: 'openai',
+      llm_confidence: 0.87,
+      llm_analysis_run: true,
+    },
+  });
+
+  assert.equal(github.actions.length, 1);
+  assert.equal(github.actions[0].type, 'update');
+  assert.match(github.actions[0].body, /### 🧠 Task Analysis/);
+  assert.match(github.actions[0].body, /OpenAI \(fallback\)/);
+  assert.match(github.actions[0].body, /Primary provider.*was unavailable/);
+});
+
+test('updateKeepaliveLoopSummary shows regex fallback warning', async () => {
+  const existingState = formatStateComment({
+    trace: 'trace-regex',
+    iteration: 1,
+    max_iterations: 5,
+    failure_threshold: 3,
+  });
+  const github = buildGithubStub({
+    comments: [{ id: 79, body: existingState, html_url: 'https://example.com/79' }],
+  });
+  await updateKeepaliveLoopSummary({
+    github,
+    context: buildContext(123),
+    core: buildCore(),
+    inputs: {
+      prNumber: 123,
+      action: 'run',
+      runResult: 'success',
+      gateConclusion: 'success',
+      tasksTotal: 4,
+      tasksUnchecked: 2,
+      keepaliveEnabled: true,
+      autofixEnabled: false,
+      iteration: 1,
+      maxIterations: 5,
+      failureThreshold: 3,
+      trace: 'trace-regex',
+      llm_provider: 'regex-fallback',
+      llm_confidence: 0.7,
+      llm_analysis_run: true,
+    },
+  });
+
+  assert.equal(github.actions.length, 1);
+  assert.equal(github.actions[0].type, 'update');
+  assert.match(github.actions[0].body, /### 🧠 Task Analysis/);
+  assert.match(github.actions[0].body, /Regex \(fallback\)/);
+  assert.match(github.actions[0].body, /Primary provider.*was unavailable/);
+});
diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js
index df3459610..a5afb996c 100644
--- a/.github/scripts/keepalive_loop.js
+++ b/.github/scripts/keepalive_loop.js
@@ -950,6 +950,11 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
   const agentSummary = normalise(inputs.agent_summary ?? inputs.agentSummary ?? inputs.codex_summary ?? inputs.codexSummary);
   const runUrl = normalise(inputs.run_url ?? inputs.runUrl);
 
+  // LLM task analysis details
+  const llmProvider = normalise(inputs.llm_provider ?? inputs.llmProvider);
+  const llmConfidence = toNumber(inputs.llm_confidence ?? inputs.llmConfidence, 0);
+  const llmAnalysisRun = toBool(inputs.llm_analysis_run ?? inputs.llmAnalysisRun, false);
+
   const { state: previousState, commentId } = await loadKeepaliveState({
     github,
     context,
@@ -1211,6 +1216,29 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
     }
   }
 
+  // LLM analysis details - show which provider was used for task completion detection
+  if (llmAnalysisRun && llmProvider) {
+    const providerIcon = llmProvider === 'github-models' ? '✅' :
+                         llmProvider === 'openai' ? '⚠️' :
+                         llmProvider === 'regex-fallback' ? '🔶' : 'ℹ️';
+    const providerLabel = llmProvider === 'github-models' ? 'GitHub Models (primary)' :
+                          llmProvider === 'openai' ? 'OpenAI (fallback)' :
+                          llmProvider === 'regex-fallback' ? 'Regex (fallback)' : llmProvider;
+    const confidencePercent = Math.round(llmConfidence * 100);
+    summaryLines.push(
+      '',
+      '### 🧠 Task Analysis',
+      `| Provider | ${providerIcon} ${providerLabel} |`,
+      `| Confidence | ${confidencePercent}% |`,
+    );
+    if (llmProvider !== 'github-models') {
+      summaryLines.push(
+        '',
+        `> ⚠️ Primary provider (GitHub Models) was unavailable; used ${providerLabel} instead.`,
+      );
+    }
+  }
+
   if (isTransientFailure) {
     summaryLines.push(
       '',
@@ -1682,12 +1710,13 @@ async function analyzeTaskCompletion({ github, context, prNumber, baseSha, headS
  * @param {number} params.prNumber - PR number
  * @param {string} params.baseSha - Base SHA (before agent work)
  * @param {string} params.headSha - Head SHA (after agent work)
+ * @param {string[]} [params.llmCompletedTasks] - Tasks marked complete by LLM analysis
  * @param {object} [params.core] - Optional core for logging
  * @returns {Promise<{updated: boolean, tasksChecked: number, details: string}>}
  */
-async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha, core }) {
+async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha, llmCompletedTasks, core }) {
   const log = (msg) => core?.info?.(msg) || console.log(msg);
-  
+
   // Get current PR body
   let pr;
   try {
@@ -1710,13 +1739,39 @@ async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha,
     return { updated: false, tasksChecked: 0, details: 'No tasks found in PR body' };
   }
 
-  // Analyze what tasks may have been completed
+  // Build high-confidence matches from multiple sources
+  let highConfidence = [];
+
+  // Source 1: LLM analysis (highest priority if available)
+  if (llmCompletedTasks && Array.isArray(llmCompletedTasks) && llmCompletedTasks.length > 0) {
+    log(`LLM analysis found ${llmCompletedTasks.length} completed task(s)`);
+    for (const task of llmCompletedTasks) {
+      highConfidence.push({
+        task,
+        reason: 'LLM session analysis',
+        confidence: 'high',
+        source: 'llm',
+      });
+    }
+  }
+
+  // Source 2: Commit/file analysis (fallback or supplementary)
   const analysis = await analyzeTaskCompletion({
     github, context, prNumber, baseSha, headSha, taskText, core
   });
 
-  // Only auto-check high-confidence matches
-  const highConfidence = analysis.matches.filter(m => m.confidence === 'high');
+  // Add commit-based matches that aren't already covered by LLM
+  const llmTasksLower = new Set((llmCompletedTasks || []).map(t => t.toLowerCase()));
+  const commitMatches = analysis.matches
+    .filter(m => m.confidence === 'high')
+    .filter(m => !llmTasksLower.has(m.task.toLowerCase()));
+
+  if (commitMatches.length > 0) {
+    log(`Commit analysis found ${commitMatches.length} additional task(s)`);
+    for (const match of commitMatches) {
+      highConfidence.push({ ...match, source: 'commit' });
+    }
+  }
   
   if (highConfidence.length === 0) {
     log('No high-confidence task matches to auto-check');
@@ -1766,14 +1821,26 @@ async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha,
     return { 
       updated: false, 
       tasksChecked: 0, 
-      details: `Failed to update PR: ${error.message}` 
+      details: `Failed to update PR: ${error.message}`,
+      sources: { llm: 0, commit: 0 },
     };
   }
 
+  // Count matches by source for reporting
+  const llmCount = highConfidence.filter(m => m.source === 'llm').length;
+  const commitCount = highConfidence.filter(m => m.source === 'commit').length;
+  
+  // Build detailed description
+  const sourceDesc = [];
+  if (llmCount > 0) sourceDesc.push(`${llmCount} from LLM analysis`);
+  if (commitCount > 0) sourceDesc.push(`${commitCount} from commit analysis`);
+  const sourceInfo = sourceDesc.length > 0 ? ` (${sourceDesc.join(', ')})` : '';
+
   return {
     updated: true,
     tasksChecked: checkedCount,
-    details: `Auto-checked ${checkedCount} task(s): ${highConfidence.map(m => m.task.slice(0, 30) + '...').join(', ')}`
+    details: `Auto-checked ${checkedCount} task(s)${sourceInfo}: ${highConfidence.map(m => m.task.slice(0, 30) + '...').join(', ')}`,
+    sources: { llm: llmCount, commit: commitCount },
   };
 }
 
diff --git a/.github/workflows/agents-keepalive-loop.yml b/.github/workflows/agents-keepalive-loop.yml
index 88bde2650..5ad9cff8d 100644
--- a/.github/workflows/agents-keepalive-loop.yml
+++ b/.github/workflows/agents-keepalive-loop.yml
@@ -362,6 +362,26 @@ jobs:
             const beforeSha = '${{ needs.evaluate.outputs.head_sha }}';  // SHA before agent ran
             const headSha = '${{ needs.run-codex.outputs.commit-sha }}';  // SHA after agent ran
 
+            // LLM analysis metadata
+            const llmProvider = '${{ needs.run-codex.outputs.llm-provider || '' }}';
+            const llmConfidence = '${{ needs.run-codex.outputs.llm-confidence || '' }}';
+            const llmAnalysisRun = '${{ needs.run-codex.outputs.llm-analysis-run }}' === 'true';
+
+            // Parse LLM completed tasks if available
+            let llmCompletedTasks = [];
+            const llmTasksJson = '${{ needs.run-codex.outputs.llm-completed-tasks || '[]' }}';
+            try {
+              llmCompletedTasks = JSON.parse(llmTasksJson);
+              if (llmCompletedTasks.length > 0) {
+                core.info(`LLM analysis found ${llmCompletedTasks.length} completed task(s)`);
+                if (llmProvider) {
+                  core.info(`LLM provider: ${llmProvider} (confidence: ${llmConfidence})`);
+                }
+              }
+            } catch (e) {
+              core.debug(`Failed to parse LLM tasks: ${e.message}`);
+            }
+
             if (!prNumber || !beforeSha || !headSha) {
               core.info('Missing required inputs for task reconciliation');
               return;
@@ -371,19 +391,24 @@ jobs:
             core.info(`Comparing ${beforeSha.slice(0, 7)} → ${headSha.slice(0, 7)}`);
 
             const result = await autoReconcileTasks({
-              github, context, prNumber, baseSha: beforeSha, headSha, core
+              github, context, prNumber, baseSha: beforeSha, headSha, llmCompletedTasks, core
             });
 
             if (result.updated) {
               core.info(`✅ ${result.details}`);
-              core.notice(`Auto-checked ${result.tasksChecked} task(s) based on commit analysis`);
+              core.notice(`Auto-checked ${result.tasksChecked} task(s) based on analysis`);
             } else {
               core.info(`ℹ️ ${result.details}`);
             }
 
-            // Output for step summary
+            // Output for step summary and downstream reporting
             core.setOutput('tasks_checked', result.tasksChecked);
             core.setOutput('reconciliation_details', result.details);
+            core.setOutput('llm_provider', llmProvider);
+            core.setOutput('llm_confidence', llmConfidence);
+            core.setOutput('llm_analysis_run', llmAnalysisRun);
+            core.setOutput('llm_tasks_count', llmCompletedTasks.length);
+            core.setOutput('commit_tasks_count', result.sources?.commit || 0);
 
       - name: Update summary comment
         uses: actions/github-script@v7
@@ -415,5 +440,9 @@ jobs:
               agent_commit_sha: '${{ needs.run-codex.outputs.commit-sha }}',
               agent_files_changed: '${{ needs.run-codex.outputs.files-changed }}',
               agent_summary: process.env.CODEX_SUMMARY || '',
+              // LLM analysis details for task completion reporting
+              llm_provider: '${{ needs.run-codex.outputs.llm-provider || '' }}',
+              llm_confidence: '${{ needs.run-codex.outputs.llm-confidence || '' }}',
+              llm_analysis_run: '${{ needs.run-codex.outputs.llm-analysis-run }}' === 'true',
             };
             await updateKeepaliveLoopSummary({ github, context, core, inputs });
diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml
index 1cb935f25..dae9c0e33 100644
--- a/.github/workflows/reusable-codex-run.yml
+++ b/.github/workflows/reusable-codex-run.yml
@@ -27,6 +27,11 @@ on:
         required: false
         default: ''
         type: string
+      workflows_ref:
+        description: 'The ref of the Workflows repo to checkout for scripts. Defaults to main.'
+        required: false
+        default: 'main'
+        type: string
       max_runtime_minutes:
         description: 'Upper bound for the job runtime in minutes.'
         required: false
@@ -116,6 +121,14 @@ jobs:
       error-category: ${{ steps.classify_failure.outputs.error_category }}
       error-type: ${{ steps.classify_failure.outputs.error_type }}
       error-recovery: ${{ steps.classify_failure.outputs.error_recovery }}
+      # LLM analysis outputs
+      llm-analysis-run: ${{ steps.llm_analysis.outputs.llm-analysis-run }}
+      llm-completed-tasks: ${{ steps.llm_analysis.outputs.completed-tasks }}
+      llm-has-completions: ${{ steps.llm_analysis.outputs.has-completions }}
+      llm-provider: ${{ steps.llm_analysis.outputs.provider }}
+      llm-confidence: ${{ steps.llm_analysis.outputs.confidence }}
+      session-event-count: ${{ steps.analyze_session.outputs.event-count }}
+      session-todo-count: ${{ steps.analyze_session.outputs.todo-count }}
     steps:
       - name: Mint GitHub App token (preferred)
         id: app_token
@@ -163,16 +176,15 @@ jobs:
           ref: ${{ inputs.pr_ref || github.ref }}
           token: ${{ steps.auth_token.outputs.checkout_token }}
 
-      # Checkout Workflows repo scripts for post-completion and error handling
+      # Checkout Workflows repo scripts for post-completion, error handling, and LLM analysis
       # These scripts are in stranske/Workflows but need to be available when
       # this reusable workflow runs in consumer repos
       - name: Checkout Workflows scripts
         uses: actions/checkout@v4
         with:
           repository: stranske/Workflows
-          ref: main
-          sparse-checkout: .github/scripts
-          sparse-checkout-cone-mode: false
+          # Use the workflows_ref input which should match the @ref in the uses: line
+          ref: ${{ inputs.workflows_ref }}
           path: .workflows-lib
           token: ${{ steps.auth_token.outputs.checkout_token }}
 
@@ -216,6 +228,17 @@ jobs:
           if [ -f pyproject.toml ]; then
             python -m pip install -e ".[dev]" || python -m pip install -e .
           fi
+
+      - name: Install Workflows repo LLM dependencies
+        run: |
+          # Install LLM dependencies from Workflows repo for session analysis
+          if [ -f .workflows-lib/tools/requirements.txt ]; then
+            echo "Installing LLM analysis dependencies..."
+            python -m pip install -r .workflows-lib/tools/requirements.txt || {
+              echo "::notice::LLM dependencies not installed, will fall back to regex analysis"
+            }
+          fi
+
       - name: Validate prompt template integrity
         id: guard
         env:
@@ -370,8 +393,10 @@ jobs:
           PR_NUM="${{ inputs.pr_number }}"
           if [ -n "${PR_NUM}" ]; then
             OUTPUT_FILE="codex-output-${PR_NUM}.md"
+            SESSION_JSONL="codex-session-${PR_NUM}.jsonl"
           else
             OUTPUT_FILE="codex-output.md"
+            SESSION_JSONL="codex-session.jsonl"
           fi
           SANDBOX="${{ inputs.sandbox }}"
           EXTRA_ARGS="${{ inputs.codex_args }}"
@@ -385,14 +410,15 @@ jobs:
           echo "Prompt file: $PROMPT_FILE"
           echo "Sandbox: $SANDBOX"
 
-          # Run codex exec with prompt from file
+          # Run codex exec with --json to capture rich session data
+          # JSONL events stream to stdout, final message still goes to OUTPUT_FILE
           # Build command array to handle EXTRA_ARGS properly
           # NOTE: --mode flag not yet supported by Codex CLI, removed for now
           CODEX_EXIT=0
           if [ -n "${EXTRA_ARGS:-}" ]; then
-            eval "codex exec --skip-git-repo-check --sandbox \"$SANDBOX\" --output-last-message \"$OUTPUT_FILE\" $EXTRA_ARGS \"\$(cat \"\$PROMPT_FILE\")\"" || CODEX_EXIT=$?
+            eval "codex exec --json --skip-git-repo-check --sandbox \"$SANDBOX\" --output-last-message \"$OUTPUT_FILE\" $EXTRA_ARGS \"\$(cat \"\$PROMPT_FILE\")\"" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$?
           else
-            codex exec --skip-git-repo-check --sandbox "$SANDBOX" --output-last-message "$OUTPUT_FILE" "$(cat "$PROMPT_FILE")" || CODEX_EXIT=$?
+            codex exec --json --skip-git-repo-check --sandbox "$SANDBOX" --output-last-message "$OUTPUT_FILE" "$(cat "$PROMPT_FILE")" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$?
           fi
 
           echo "exit-code=${CODEX_EXIT}" >> "$GITHUB_OUTPUT"
@@ -420,6 +446,130 @@ jobs:
           # Exit with original code to mark job as failed if Codex failed
           exit "$CODEX_EXIT"
 
+      - name: Analyze Codex session
+        id: analyze_session
+        if: always()
+        env:
+          PYTHONPATH: ${{ github.workspace }}
+          PR_NUM: ${{ inputs.pr_number }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+
+          if [ -n "${PR_NUM}" ]; then
+            SESSION_JSONL="codex-session-${PR_NUM}.jsonl"
+          else
+            SESSION_JSONL="codex-session.jsonl"
+          fi
+          export SESSION_JSONL
+
+          # Check if session file exists and has content
+          if [ ! -f "$SESSION_JSONL" ] || [ ! -s "$SESSION_JSONL" ]; then
+            echo "No session JSONL found or file is empty"
+            echo "session-available=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          echo "Session JSONL captured: $(wc -l < "$SESSION_JSONL") lines"
+          echo "session-available=true" >> "$GITHUB_OUTPUT"
+
+          # Basic parsing (always available)
+          python3 << 'PYEOF'
+          import os
+          import sys
+          sys.path.insert(0, '.')
+
+          session_file = os.environ.get("SESSION_JSONL", "codex-session.jsonl")
+          github_output = os.environ.get("GITHUB_OUTPUT", "/dev/null")
+
+          try:
+              from tools.codex_jsonl_parser import parse_codex_jsonl_file
+
+              session = parse_codex_jsonl_file(session_file)
+
+              print(f"::notice::Session parsed: {session.raw_event_count} events")
+              print(f"::notice::Agent messages: {len(session.agent_messages)}")
+              print(f"::notice::Commands: {len(session.commands)} ({len(session.successful_commands)} ok, {len(session.failed_commands)} failed)")
+              print(f"::notice::File changes: {len(session.file_changes)}")
+              print(f"::notice::Todo items: {len(session.todo_items)}")
+
+              if session.parse_errors:
+                  print(f"::warning::Parse errors: {len(session.parse_errors)}")
+
+              # Output key metrics for downstream steps
+              with open(github_output, "a") as f:
+                  f.write(f"event-count={session.raw_event_count}\n")
+                  f.write(f"message-count={len(session.agent_messages)}\n")
+                  f.write(f"command-count={len(session.commands)}\n")
+                  f.write(f"file-change-count={len(session.file_changes)}\n")
+                  f.write(f"todo-count={len(session.todo_items)}\n")
+                  f.write(f"completed-todo-count={len(session.completed_todos)}\n")
+
+          except ImportError as e:
+              print(f"::notice::Session parser not available: {e}")
+          except Exception as e:
+              print(f"::warning::Session analysis failed: {e}")
+          PYEOF
+
+      - name: Analyze task completion with LLM
+        id: llm_analysis
+        if: always() && steps.analyze_session.outputs.session-available == 'true' && inputs.pr_number != ''
+        env:
+          PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }}
+          PR_NUM: ${{ inputs.pr_number }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+
+          SESSION_JSONL="codex-session-${PR_NUM}.jsonl"
+          ANALYSIS_FILE="codex-analysis-${PR_NUM}.json"
+
+          # Fetch PR body to extract tasks
+          echo "Fetching PR #${PR_NUM} body..."
+          PR_BODY=$(gh pr view "${PR_NUM}" --json body --jq '.body' 2>/dev/null || echo "")
+
+          if [ -z "$PR_BODY" ]; then
+            echo "::notice::Could not fetch PR body, skipping LLM analysis"
+            echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Save PR body to temp file
+          echo "$PR_BODY" > pr_body.md
+
+          # Run full LLM analysis and save JSON output
+          # Scripts are in .workflows-lib from Workflows repo checkout
+          echo "Running LLM-powered task completion analysis..."
+          python3 .workflows-lib/scripts/analyze_codex_session.py \
+            --session-file "$SESSION_JSONL" \
+            --pr-body-file pr_body.md \
+            --output json > "$ANALYSIS_FILE" 2>&1 || {
+              echo "::warning::LLM analysis failed, continuing without it"
+              cat "$ANALYSIS_FILE" 2>/dev/null || true  # Show error for debugging
+              echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT"
+              rm -f "$ANALYSIS_FILE"
+              exit 0
+            }
+
+          # Also output to GitHub Actions for visibility
+          python3 .workflows-lib/scripts/analyze_codex_session.py \
+            --session-file "$SESSION_JSONL" \
+            --pr-body-file pr_body.md \
+            --output github-actions || true
+
+          echo "llm-analysis-run=true" >> "$GITHUB_OUTPUT"
+          echo "analysis-file=$ANALYSIS_FILE" >> "$GITHUB_OUTPUT"
+
+          # Extract key fields for downstream use
+          if [ -f "$ANALYSIS_FILE" ]; then
+            COMPLETED=$(python3 -c "import json; d=json.load(open('$ANALYSIS_FILE')); print(json.dumps(d.get('completed_tasks', [])))")
+            PROVIDER=$(python3 -c "import json; d=json.load(open('$ANALYSIS_FILE')); print(d.get('provider', 'unknown'))")
+            CONFIDENCE=$(python3 -c "import json; d=json.load(open('$ANALYSIS_FILE')); print(d.get('confidence', 0))")
+            echo "completed-tasks=$COMPLETED" >> "$GITHUB_OUTPUT"
+            echo "provider=$PROVIDER" >> "$GITHUB_OUTPUT"
+            echo "confidence=$CONFIDENCE" >> "$GITHUB_OUTPUT"
+          fi
+
       - name: Commit and push changes
         id: commit
         env:
@@ -555,6 +705,8 @@ jobs:
           name: codex-output-${{ inputs.pr_number || github.run_id }}
           path: |
             codex-output*.md
+            codex-session*.jsonl
+            codex-analysis*.json
           if-no-files-found: ignore
 
       - name: Post completion checkpoint comment
diff --git a/docs/plans/langchain-keepalive-integration.md b/docs/plans/langchain-keepalive-integration.md
new file mode 100644
index 000000000..0ee97c2af
--- /dev/null
+++ b/docs/plans/langchain-keepalive-integration.md
@@ -0,0 +1,471 @@
+# LangChain Keepalive Integration Plan
+
+> **Status**: Planning
+> **Created**: 2026-01-02
+> **Target Branch**: `feature/langchain-analysis`
+> **Test Consumer**: `stranske/Portable-Alpha-Extension-Model`
+
+---
+
+## Summary of Findings
+
+### 1. Session Data Sources (Multiple Options!)
+
+We discovered **three different data sources** from the Codex CLI, each with different richness levels:
+
+#### Option A: Final Summary (`--output-last-message`) - Current
+**Current state**: We capture via `codex-output-*.md` artifacts.
+- Uploaded by `reusable-codex-run.yml` line 553
+- Contains Codex's final summary message only
+- Artifact name format: `codex-output-{pr_number}`
+
+**Pros**: Simple, low data volume
+**Cons**: Limited context, misses intermediate steps
+
+---
+
+#### Option B: JSONL Event Stream (`--json`) - **Recommended**
+The Codex CLI has a `--json` flag that streams **detailed events as JSONL**:
+
+```bash
+codex exec --json --output-last-message "$OUTPUT_FILE" "$PROMPT" 2>&1 | tee "$SESSION_LOG"
+```
+
+**Event types available** (from [exec.md](https://github.com/openai/codex/blob/main/docs/exec.md#json-output-mode)):
+- `thread.started` / `turn.started` / `turn.completed` / `turn.failed`
+- `item.started` / `item.updated` / `item.completed`
+
+**Item types**:
+| Type | Contains | LLM Analysis Potential |
+|------|----------|----------------------|
+| `agent_message` | Assistant responses | ⭐⭐⭐ High - explicit completion statements |
+| `reasoning` | Model thinking summaries | ⭐⭐⭐ High - reveals intent and progress |
+| `command_execution` | Shell commands + exit codes + output | ⭐⭐ Medium - shows actual work done |
+| `file_change` | Files added/modified/deleted | ⭐⭐ Medium - concrete evidence |
+| `mcp_tool_call` | MCP tool invocations | ⭐ Low - implementation detail |
+| `web_search` | Web search actions | ⭐ Low - implementation detail |
+| `todo_list` | Task tracking | ⭐⭐⭐ High - direct task mapping! |
+
+**Known issues** (from GitHub):
+- [#4776](https://github.com/openai/codex/issues/4776): Field names changed (`item_type`→`type`, `assistant_message`→`agent_message`)
+- [#5276](https://github.com/openai/codex/issues/5276): Reasoning token usage not yet in output
+- Schema may evolve - need graceful parsing
+
+**Pros**: Rich data, shows reasoning and progress, includes todo tracking!
+**Cons**: More data to process, schema changes over time
+
+---
+
+#### Option C: Session Files (`~/.codex/sessions/`)
+Full session history saved to disk:
+```
+~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl
+```
+
+**Pros**: Most complete data, includes full token counts
+**Cons**: Requires file system access post-run, may not persist in CI
+
+---
+
+#### Option D: TUI Session Recording (`CODEX_TUI_RECORD_SESSION=1`)
+Environment variable enables detailed logging:
+```bash
+CODEX_TUI_RECORD_SESSION=1 codex ...
+# Logs to ~/.codex/log/session-YYYYMMDDTHHMMSSZ.jsonl
+```
+
+**Pros**: Captures all TUI events
+**Cons**: Designed for interactive mode, may not work with `codex exec`
+
+---
+
+### Data Source Selection for Testing
+
+| Phase | Data Source | Why |
+|-------|-------------|-----|
+| Test 1 | Option A (summary only) | Baseline comparison |
+| Test 2 | Option B (`--json` stream) | Recommended - rich + practical |
+| Test 3 | Option B subset | Only `agent_message` + `reasoning` + `todo_list` |
+
+**Priority fields for analysis**:
+1. `agent_message` - What did Codex say it accomplished?
+2. `reasoning` - What was it thinking?
+3. `todo_list` - Direct mapping to PR checkboxes!
+4. `file_change` - Concrete evidence of work
+
+### 2. GitHub Models API ✅
+
+**Verified working** with your GitHub token:
+```bash
+curl -s "https://models.inference.ai.azure.com/chat/completions" \
+  -H "Authorization: Bearer $GITHUB_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"messages":[{"role":"user","content":"test"}],"model":"gpt-4o-mini"}'
+```
+
+**Integration approach**: Use LangChain's OpenAI integration with custom base URL:
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(
+    model="gpt-4o-mini",
+    base_url="https://models.inference.ai.azure.com",
+    api_key=os.environ["GITHUB_TOKEN"],  # GitHub token works!
+)
+```
+
+No separate `langchain-github` package needed.
+
+---
+
+## Community Tools & Research
+
+### Existing Codex Session Analysis Tools
+
+| Tool | Description | Relevance |
+|------|-------------|-----------|
+| [codex-session-view](https://github.com/AcidicSoil/codex-session-view) | Visualizer with **AI Session Coach** that analyzes sessions using LLM | ⭐⭐⭐ Reference implementation! |
+| [codex-history-list](https://github.com/shinshin86/codex-history-list) | CLI to list sessions, extracts cwd and first user request | ⭐⭐ Parsing patterns |
+| [codex_usage_report](https://github.com/rubens-amaral/codex_usage_report) | Go CLI analyzing session logs for rate limits | ⭐ Token tracking |
+| [cxusage](https://github.com/zaharsyahrafi/cxusage) | Daily usage aggregation from session logs | ⭐ Aggregation patterns |
+
+**Key insight from `codex-session-view`**: Uses AI Session Coach with multiple providers (OpenAI, Gemini, LM Studio) - validates our provider fallback approach!
+
+### LangChain Integration Patterns
+
+**No direct Codex→LangChain library exists**, but relevant LangChain components:
+
+| Component | Use Case |
+|-----------|----------|
+| `TrajectoryEvalChain` | Evaluates agent step sequences - similar to our task completion analysis |
+| `LogStreamCallbackHandler` | Real-time event streaming - pattern for processing JSONL |
+| `FileCallbackHandler` | Persists agent actions - reference for our logging |
+
+**LangChain trajectory format** (from `trajectory_eval_chain.py`):
+```python
+def get_agent_trajectory(steps: Sequence[tuple[AgentAction, str]]) -> str:
+    return "\n\n".join([
+        f"""Step {i}:
+Tool used: {action.tool}
+Tool input: {action.tool_input}
+Tool output: {output}"""
+        for i, (action, output) in enumerate(steps, 1)
+    ])
+```
+
+This pattern maps well to Codex JSONL events!
+
+### Gap Analysis
+
+- ❌ No existing Codex JSONL → LangChain message converter
+- ❌ Python SDK for Codex still proposed ([#5320](https://github.com/openai/codex/issues/5320))
+- ✅ Community has validated LLM-based session analysis approach
+- ✅ Our provider fallback matches `codex-session-view` pattern
+
+---
+
+## Provider Fallback Chain
+
+```
+┌─────────────────────────────────────────┐
+│  1. GitHub Models API (gpt-4o-mini)     │
+│     - Uses existing GITHUB_TOKEN        │
+│     - Free with Copilot subscription    │
+├─────────────────────────────────────────┤
+│  2. OpenAI API (gpt-4o-mini)            │
+│     - Uses OPENAI_API_KEY secret        │
+│     - ~$0.0006 per analysis             │
+├─────────────────────────────────────────┤
+│  3. Regex Fallback                      │
+│     - No API calls                      │
+│     - Basic pattern matching            │
+└─────────────────────────────────────────┘
+```
+
+---
+
+## Analysis Timing Options
+
+| Option | When | Pros | Cons |
+|--------|------|------|------|
+| **A: Every round** | After each Codex run | Most accurate, catches all completions | Higher API usage |
+| **B: On stall** | After round with no checkbox changes | Targeted intervention | Delays detection by 1 round |
+| **C: Conditional** | Round 1 always, then only on stall | Balances accuracy vs cost | More logic complexity |
+| **D: Post-CI** | After CI completes | Can correlate CI results with tasks | Adds latency |
+
+**Testing plan**: Run A vs C to measure cost/benefit trade-off.
+
+---
+
+## Dependencies to Add
+
+```toml
+# pyproject.toml [project.optional-dependencies]
+langchain = [
+    "langchain-core>=0.3.0",
+    "langchain-openai>=0.3.0",
+]
+```
+
+**Note**: Keep as optional dependency so workflows without LLM still function.
+
+---
+
+## Files to Create/Modify
+
+### New Files in `tools/`
+
+| File | Purpose |
+|------|---------|
+| `llm_provider.py` | Provider abstraction with GitHub → OpenAI → regex fallback |
+| `langchain_task_extractor.py` | LLM-enhanced task/scope extraction |
+| `codex_log_analyzer.py` | Session output analysis for completion detection |
+| `ci_failure_triage.py` | CI failure classification and fix suggestions |
+| `update_pr_checkboxes.py` | GitHub API wrapper to update PR body checkboxes |
+| `post_progress_comment.py` | Posts analysis comment when work incomplete |
+
+### Workflow Modifications
+
+| File | Change |
+|------|--------|
+| `.github/workflows/reusable-codex-run.yml` | Add post-run analysis step |
+| `.github/scripts/keepalive_loop.js` | Inject analysis into next prompt |
+
+---
+
+## Testing Plan
+
+### Phase 0: Data Source Evaluation
+
+**Goal**: Determine which session data source provides best signal-to-noise for task completion detection.
+
+| Test | Data Source | Method |
+|------|-------------|--------|
+| **0.1** | Summary only (Option A) | Current `--output-last-message` |
+| **0.2** | Full JSONL (Option B) | `--json` piped to file |
+| **0.3** | Filtered JSONL | Only `agent_message` + `reasoning` + `todo_list` events |
+
+**Evaluation criteria**:
+- Can the LLM accurately detect task completion?
+- What's the token cost per analysis?
+- How robust is parsing to schema changes?
+
+**Workflow change for Option B**:
+```yaml
+# Current:
+codex exec --output-last-message "$OUTPUT_FILE" "$PROMPT"
+
+# Enhanced:
+codex exec --json --output-last-message "$OUTPUT_FILE" "$PROMPT" 2>&1 | tee "$SESSION_JSONL"
+# Then parse $SESSION_JSONL for rich analysis
+```
+
+---
+
+### Phase 1: Baseline (Current System)
+
+1. Create test issue in Portable Alpha with 3-4 tasks
+2. Let keepalive run with current regex-only system
+3. Record:
+   - Total rounds to actual completion
+   - Rounds to checkbox detection
+   - False negatives (work done, not detected)
+
+### Phase 2: LangChain Enhanced
+
+1. Push `feature/langchain-analysis` branch to Workflows
+2. Update Portable Alpha to use:
+   ```yaml
+   uses: stranske/Workflows/.github/workflows/reusable-codex-run.yml@feature/langchain-analysis
+   ```
+3. Add `OPENAI_API_KEY` secret to Portable Alpha (fallback)
+4. Create similar test issue
+5. Record same metrics
+
+### Phase 3: Analysis
+
+| Metric | Regex-Only | LangChain | Improvement |
+|--------|------------|-----------|-------------|
+| Rounds to completion | ? | ? | ? |
+| Detection accuracy | ? | ? | ? |
+| False positives | ? | ? | ? |
+| API cost per PR | $0 | ~$0.01 | -$0.01 |
+| Time per round | ? | +2-3s | Negligible |
+
+---
+
+## Implementation Steps
+
+### Step 1: Add LangChain dependencies
+- Update `pyproject.toml` with optional `[langchain]` extras
+- Create `tools/llm_provider.py` with fallback logic
+
+### Step 2: Port and adapt tools
+- Copy tools from Trend Model Project (already retrieved to /tmp)
+- Adapt to use `llm_provider.py` abstraction
+- Add tests
+
+### Step 3: Workflow integration
+- Add analysis step to `reusable-codex-run.yml`
+- Wire analysis results to PR checkbox updates
+- Wire analysis results to next-round prompt
+
+### Step 4: Consumer setup
+- Update Portable Alpha workflow reference
+- Add `OPENAI_API_KEY` secret
+- Create test issue
+
+### Step 5: Run comparison tests
+- Execute Phase 1 (baseline)
+- Execute Phase 2 (enhanced)
+- Document results
+
+### Step 6: Tune and finalize
+- Decide on timing option (A/B/C/D)
+- Merge to main
+- Revert consumer to `@main`
+
+---
+
+## Secrets Required
+
+| Secret | Repo | Purpose |
+|--------|------|---------|
+| `OPENAI_API_KEY` | Portable Alpha | Fallback LLM provider |
+| `GITHUB_TOKEN` | Auto-provided | GitHub Models API (primary) |
+
+---
+
+## Codex JSONL Event Schema Reference
+
+Based on [exec.md](https://github.com/openai/codex/blob/main/docs/exec.md) and source code analysis.
+
+### Thread/Turn Events
+```json
+{"type": "thread.started", "thread_id": "uuid", "timestamp": "..."}
+{"type": "turn.started", "turn_id": "uuid", "thread_id": "uuid"}
+{"type": "turn.completed", "turn_id": "uuid", "token_usage": {...}}
+{"type": "turn.failed", "turn_id": "uuid", "error": "..."}
+```
+
+### Item Events
+```json
+{"type": "item.started", "item_id": "uuid", "item_type": "agent_message"}
+{"type": "item.updated", "item_id": "uuid", "content": "..."}
+{"type": "item.completed", "item_id": "uuid"}
+```
+
+### High-Value Item Types for Analysis
+
+**`agent_message`** - What Codex says:
+```json
+{
+  "type": "item.completed",
+  "item_type": "agent_message",
+  "content": "I've completed the first two tasks..."
+}
+```
+
+**`reasoning`** - What Codex is thinking:
+```json
+{
+  "type": "item.completed", 
+  "item_type": "reasoning",
+  "content": "The user wants me to fix the tests. I should first..."
+}
+```
+
+**`command_execution`** - Shell commands:
+```json
+{
+  "type": "item.completed",
+  "item_type": "command_execution",
+  "command": "pytest tests/",
+  "exit_code": 0,
+  "output": "..."
+}
+```
+
+**`file_change`** - File modifications:
+```json
+{
+  "type": "item.completed",
+  "item_type": "file_change",
+  "path": "src/module.py",
+  "change_type": "modified"
+}
+```
+
+**`todo_list`** - Task tracking (if emitted):
+```json
+{
+  "type": "item.completed",
+  "item_type": "todo_list",
+  "items": [
+    {"task": "Fix test failures", "status": "completed"},
+    {"task": "Update documentation", "status": "in_progress"}
+  ]
+}
+```
+
+### Schema Versioning Notes
+
+⚠️ **Known breaking changes**:
+- `item_type` was renamed to `type` in some events
+- `assistant_message` renamed to `agent_message`
+- Always use defensive parsing with fallbacks
+
+---
+
+## Rollback Plan
+
+If LangChain integration causes issues:
+1. Consumer repos: Change `@feature/langchain-analysis` back to `@main`
+2. No code changes needed in consumer
+3. Feature branch remains available for debugging
+
+---
+
+## Open Questions
+
+1. **~~Codex session logs~~**: Do we need full transcripts, or is the summary sufficient?
+   - ✅ **RESOLVED**: Multiple options identified! `--json` mode provides rich JSONL stream.
+   - Testing plan includes Phase 0 to evaluate data source options.
+
+2. **`todo_list` event**: Does Codex emit `todo_list` events that map to PR checkboxes?
+   - This could be the holy grail for direct checkbox synchronization
+   - Need to capture real session to verify event structure
+
+3. **Rate limits**: Does GitHub Models API have rate limits we need to handle?
+   - Need to test under load
+
+4. **Checkbox update permissions**: Can workflow token update PR body?
+   - Yes, `contents: write` and `pull-requests: write` already granted
+
+5. **JSONL schema stability**: How often do Codex event schemas change?
+   - Known issue [#4776](https://github.com/openai/codex/issues/4776) documents field renames
+   - Need defensive parsing with fallbacks
+
+---
+
+## Next Steps
+
+### Immediate (Data Source Evaluation)
+1. [ ] Modify workflow to capture `--json` output alongside summary
+2. [ ] Run Codex manually to capture sample JSONL session
+3. [ ] Analyze which event types contain task completion signals
+4. [ ] Verify `todo_list` event structure (if present)
+
+### Implementation
+5. [ ] Create `feature/langchain-analysis` branch
+6. [ ] Implement JSONL parser for Codex events
+7. [ ] Implement `llm_provider.py` with fallback chain
+8. [ ] Port the three analysis tools with JSONL support
+9. [ ] Add workflow integration
+
+### Testing
+10. [ ] Set up Portable Alpha for testing
+11. [ ] Run Phase 0 data source comparison
+12. [ ] Run Phase 1 baseline measurement
+13. [ ] Run Phase 2 LangChain measurement
+14. [ ] Document results and decide on timing option
diff --git a/pyproject.toml b/pyproject.toml
index dc785f8a6..6fe7ff8b3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,13 @@ dev = [
     "tomlkit>=0.13.0",
 ]
 
+# LangChain integration for LLM-enhanced task analysis
+# Install with: pip install -e ".[langchain]"
+langchain = [
+    "langchain-core>=0.3.0",
+    "langchain-openai>=0.3.0",
+]
+
 [tool.setuptools]
 # This repo is primarily automation + scripts; avoid setuptools trying to auto-discover
 # random top-level dirs as importable packages (which breaks editable installs).
diff --git a/scripts/analyze_codex_session.py b/scripts/analyze_codex_session.py
new file mode 100755
index 000000000..09f0f8602
--- /dev/null
+++ b/scripts/analyze_codex_session.py
@@ -0,0 +1,325 @@
+#!/usr/bin/env python3
+"""
+Analyze Codex Session CLI
+
+Command-line interface for analyzing Codex session output to determine
+task completion status. Designed to be called from GitHub Actions workflows.
+
+Usage:
+    python scripts/analyze_codex_session.py \
+        --session-file codex-session-123.jsonl \
+        --tasks "Fix bug" "Add tests" "Update docs" \
+        --output json
+
+    # Or with PR body file containing checkboxes
+    python scripts/analyze_codex_session.py \
+        --session-file codex-session-123.jsonl \
+        --pr-body-file pr_body.md \
+        --output github-actions
+
+Exit codes:
+    0 - Analysis completed successfully
+    1 - Error during analysis
+    2 - No session file found
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from tools.codex_session_analyzer import AnalysisResult, analyze_session
+
+logger = logging.getLogger(__name__)
+
+
+def extract_tasks_from_pr_body(pr_body: str) -> list[str]:
+    """
+    Extract task descriptions from PR body checkboxes.
+
+    Looks for patterns like:
+    - [ ] Task description
+    - [x] Completed task
+
+    Returns only unchecked tasks (the ones we're tracking).
+    """
+    tasks = []
+
+    # Match both checked and unchecked boxes to get all tasks
+    # Pattern: - [ ] or - [x] followed by task text
+    checkbox_pattern = re.compile(r"^[\s]*-\s*\[([ xX])\]\s*(.+)$", re.MULTILINE)
+
+    for match in checkbox_pattern.finditer(pr_body):
+        checked = match.group(1).lower() == "x"
+        task_text = match.group(2).strip()
+
+        # Only track unchecked tasks
+        if not checked and task_text:
+            tasks.append(task_text)
+
+    return tasks
+
+
+def extract_all_tasks_from_pr_body(pr_body: str) -> dict[str, bool]:
+    """
+    Extract all tasks with their current status.
+
+    Returns:
+        Dict mapping task text to checked status
+    """
+    tasks = {}
+    checkbox_pattern = re.compile(r"^[\s]*-\s*\[([ xX])\]\s*(.+)$", re.MULTILINE)
+
+    for match in checkbox_pattern.finditer(pr_body):
+        checked = match.group(1).lower() == "x"
+        task_text = match.group(2).strip()
+        if task_text:
+            tasks[task_text] = checked
+
+    return tasks
+
+
+def update_pr_body_checkboxes(pr_body: str, completed_tasks: list[str]) -> str:
+    """
+    Update PR body to check off completed tasks.
+
+    Args:
+        pr_body: Original PR body text
+        completed_tasks: List of task descriptions to mark complete
+
+    Returns:
+        Updated PR body with checkboxes updated
+    """
+    updated_body = pr_body
+
+    for task in completed_tasks:
+        # Escape special regex characters in task
+        escaped_task = re.escape(task)
+
+        # Pattern to match unchecked checkbox with this task
+        pattern = re.compile(
+            rf"^([\s]*-\s*)\[ \](\s*){escaped_task}",
+            re.MULTILINE,
+        )
+
+        # Replace with checked version
+        updated_body = pattern.sub(rf"\1[x]\2{task}", updated_body)
+
+    return updated_body
+
+
+def output_github_actions(result: AnalysisResult) -> None:
+    """Output results in GitHub Actions format."""
+    github_output = os.environ.get("GITHUB_OUTPUT", "")
+
+    # Print notices for visibility in logs
+    print(f"::notice::Analysis completed with {result.completion.provider_used}")
+    print(f"::notice::Confidence: {result.completion.confidence:.0%}")
+
+    if result.completion.completed_tasks:
+        print(f"::notice::Completed tasks: {len(result.completion.completed_tasks)}")
+        for task in result.completion.completed_tasks:
+            print(f"::notice::  ✓ {task[:80]}")
+
+    if result.completion.in_progress_tasks:
+        print(f"::notice::In progress: {len(result.completion.in_progress_tasks)}")
+
+    if result.completion.blocked_tasks:
+        print(f"::warning::Blocked tasks: {len(result.completion.blocked_tasks)}")
+        for task in result.completion.blocked_tasks:
+            print(f"::warning::  ✗ {task[:80]}")
+
+    # Write to GITHUB_OUTPUT if available
+    if github_output:
+        with open(github_output, "a") as f:
+            f.write(f"provider={result.completion.provider_used}\n")
+            f.write(f"confidence={result.completion.confidence}\n")
+            f.write(f"completed-count={len(result.completion.completed_tasks)}\n")
+            f.write(f"in-progress-count={len(result.completion.in_progress_tasks)}\n")
+            f.write(f"blocked-count={len(result.completion.blocked_tasks)}\n")
+            f.write(f"has-completions={str(result.has_completions).lower()}\n")
+            f.write(f"has-progress={str(result.has_progress).lower()}\n")
+            f.write(f"is-stalled={str(result.is_stalled).lower()}\n")
+
+            # Encode completed tasks as JSON for downstream use
+            completed_json = json.dumps(result.completion.completed_tasks)
+            f.write(f"completed-tasks={completed_json}\n")
+
+
+def output_json(result: AnalysisResult, pretty: bool = False) -> None:
+    """Output results as JSON."""
+    data = {
+        "provider": result.completion.provider_used,
+        "confidence": result.completion.confidence,
+        "completed_tasks": result.completion.completed_tasks,
+        "in_progress_tasks": result.completion.in_progress_tasks,
+        "blocked_tasks": result.completion.blocked_tasks,
+        "reasoning": result.completion.reasoning,
+        "data_source": result.data_source,
+        "input_length": result.input_length,
+        "analysis_text_length": result.analysis_text_length,
+    }
+
+    if result.session:
+        data["session"] = {
+            "event_count": result.session.raw_event_count,
+            "message_count": len(result.session.agent_messages),
+            "command_count": len(result.session.commands),
+            "file_change_count": len(result.session.file_changes),
+            "todo_count": len(result.session.todo_items),
+        }
+
+    if pretty:
+        print(json.dumps(data, indent=2))
+    else:
+        print(json.dumps(data))
+
+
+def output_markdown(result: AnalysisResult) -> None:
+    """Output results as markdown summary."""
+    print(result.get_summary())
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Analyze Codex session output for task completion",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    parser.add_argument(
+        "--session-file",
+        required=True,
+        help="Path to Codex session JSONL or summary file",
+    )
+
+    parser.add_argument(
+        "--tasks",
+        nargs="*",
+        help="Task descriptions to track (alternative to --pr-body-file)",
+    )
+
+    parser.add_argument(
+        "--pr-body-file",
+        help="Path to file containing PR body with checkboxes",
+    )
+
+    parser.add_argument(
+        "--pr-body",
+        help="PR body text directly (alternative to --pr-body-file)",
+    )
+
+    parser.add_argument(
+        "--context",
+        help="Additional context for analysis",
+    )
+
+    parser.add_argument(
+        "--output",
+        choices=["json", "json-pretty", "markdown", "github-actions"],
+        default="json",
+        help="Output format (default: json)",
+    )
+
+    parser.add_argument(
+        "--update-pr-body",
+        action="store_true",
+        help="Output updated PR body with completed checkboxes",
+    )
+
+    parser.add_argument(
+        "--updated-body-file",
+        help="Write updated PR body to this file",
+    )
+
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable verbose logging",
+    )
+
+    args = parser.parse_args()
+
+    # Setup logging
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(levelname)s: %(message)s",
+    )
+
+    # Check session file exists
+    session_path = Path(args.session_file)
+    if not session_path.exists():
+        logger.error(f"Session file not found: {args.session_file}")
+        return 2
+
+    # Get session content
+    session_content = session_path.read_text()
+
+    # Get tasks
+    tasks = []
+    pr_body = None
+
+    if args.tasks:
+        tasks = args.tasks
+    elif args.pr_body_file:
+        pr_body = Path(args.pr_body_file).read_text()
+        tasks = extract_tasks_from_pr_body(pr_body)
+    elif args.pr_body:
+        pr_body = args.pr_body
+        tasks = extract_tasks_from_pr_body(pr_body)
+    else:
+        logger.error("Must provide --tasks, --pr-body-file, or --pr-body")
+        return 1
+
+    if not tasks:
+        logger.warning("No tasks found to track")
+        # Still run analysis but with empty task list
+
+    logger.info(f"Analyzing session ({len(session_content)} bytes) with {len(tasks)} tasks")
+
+    # Run analysis
+    try:
+        result = analyze_session(
+            content=session_content,
+            tasks=tasks,
+            context=args.context,
+        )
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}")
+        return 1
+
+    # Output results
+    if args.output == "github-actions":
+        output_github_actions(result)
+    elif args.output == "json":
+        output_json(result)
+    elif args.output == "json-pretty":
+        output_json(result, pretty=True)
+    elif args.output == "markdown":
+        output_markdown(result)
+
+    # Update PR body if requested
+    if args.update_pr_body and pr_body and result.completion.completed_tasks:
+        updated_body = update_pr_body_checkboxes(pr_body, result.completion.completed_tasks)
+
+        if args.updated_body_file:
+            Path(args.updated_body_file).write_text(updated_body)
+            logger.info(f"Updated PR body written to {args.updated_body_file}")
+        else:
+            print("\n--- UPDATED PR BODY ---")
+            print(updated_body)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml b/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml
index f070017f6..f228686a2 100644
--- a/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml
+++ b/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml
@@ -438,6 +438,18 @@ jobs:
             const beforeSha = '${{ needs.evaluate.outputs.head_sha }}';
             const headSha = '${{ needs.run-codex.outputs.commit-sha }}';
 
+            // Parse LLM completed tasks if available
+            let llmCompletedTasks = [];
+            const llmTasksJson = '${{ needs.run-codex.outputs.llm-completed-tasks || '[]' }}';
+            try {
+              llmCompletedTasks = JSON.parse(llmTasksJson);
+              if (llmCompletedTasks.length > 0) {
+                core.info(`LLM analysis found ${llmCompletedTasks.length} completed task(s)`);
+              }
+            } catch (e) {
+              core.debug(`Failed to parse LLM tasks: ${e.message}`);
+            }
+
             if (!prNumber || !beforeSha || !headSha) {
               core.info('Missing required inputs for task reconciliation');
               return;
@@ -447,12 +459,12 @@ jobs:
             core.info(`Comparing ${beforeSha.slice(0, 7)} → ${headSha.slice(0, 7)}`);
 
             const result = await autoReconcileTasks({
-              github, context, prNumber, baseSha: beforeSha, headSha, core
+              github, context, prNumber, baseSha: beforeSha, headSha, llmCompletedTasks, core
             });
 
             if (result.updated) {
               core.info(`✅ ${result.details}`);
-              core.notice(`Auto-checked ${result.tasksChecked} task(s) based on commit analysis`);
+              core.notice(`Auto-checked ${result.tasksChecked} task(s) based on analysis`);
             } else {
               core.info(`ℹ️ ${result.details}`);
             }
@@ -488,5 +500,9 @@ jobs:
               agent_commit_sha: '${{ needs.run-codex.outputs.commit-sha }}',
               agent_files_changed: '${{ needs.run-codex.outputs.files-changed }}',
               agent_summary: process.env.CODEX_SUMMARY || '',
+              // LLM task analysis provider info
+              llm_provider: '${{ needs.run-codex.outputs.llm-provider || '' }}',
+              llm_confidence: '${{ needs.run-codex.outputs.llm-confidence || '' }}',
+              llm_analysis_run: '${{ needs.run-codex.outputs.llm-analysis-run }}' === 'true',
             };
             await updateKeepaliveLoopSummary({ github, context, core, inputs });
diff --git a/tests/scripts/test_analyze_codex_session.py b/tests/scripts/test_analyze_codex_session.py
new file mode 100644
index 000000000..0da7cb39e
--- /dev/null
+++ b/tests/scripts/test_analyze_codex_session.py
@@ -0,0 +1,280 @@
+"""Tests for analyze_codex_session CLI script."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+# Import functions directly for unit testing
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from scripts.analyze_codex_session import (
+    extract_all_tasks_from_pr_body,
+    extract_tasks_from_pr_body,
+    update_pr_body_checkboxes,
+)
+
+
+class TestExtractTasksFromPRBody:
+    """Tests for PR body task extraction."""
+
+    def test_extracts_unchecked_tasks(self) -> None:
+        pr_body = """
+## Tasks
+
+- [ ] Fix the bug
+- [ ] Add tests
+- [x] Update docs
+"""
+        tasks = extract_tasks_from_pr_body(pr_body)
+        assert tasks == ["Fix the bug", "Add tests"]
+
+    def test_handles_mixed_indentation(self) -> None:
+        pr_body = """
+- [ ] Task 1
+  - [ ] Subtask 1a
+    - [ ] Sub-subtask
+- [ ] Task 2
+"""
+        tasks = extract_tasks_from_pr_body(pr_body)
+        assert "Task 1" in tasks
+        assert "Subtask 1a" in tasks
+        assert "Task 2" in tasks
+
+    def test_handles_uppercase_x(self) -> None:
+        pr_body = """
+- [X] Completed with uppercase
+- [ ] Still pending
+"""
+        tasks = extract_tasks_from_pr_body(pr_body)
+        assert tasks == ["Still pending"]
+
+    def test_empty_body_returns_empty_list(self) -> None:
+        assert extract_tasks_from_pr_body("") == []
+
+    def test_no_checkboxes_returns_empty_list(self) -> None:
+        pr_body = """
+## Description
+This PR fixes a bug.
+
+## Notes
+- Item 1
+- Item 2
+"""
+        assert extract_tasks_from_pr_body(pr_body) == []
+
+    def test_extracts_from_multiple_sections(self) -> None:
+        pr_body = """
+## Tasks
+- [ ] Task from tasks section
+
+## Acceptance Criteria
+- [ ] Criterion 1
+- [ ] Criterion 2
+"""
+        tasks = extract_tasks_from_pr_body(pr_body)
+        assert len(tasks) == 3
+        assert "Task from tasks section" in tasks
+        assert "Criterion 1" in tasks
+
+
+class TestExtractAllTasksFromPRBody:
+    """Tests for extracting all tasks with status."""
+
+    def test_extracts_all_with_status(self) -> None:
+        pr_body = """
+- [ ] Unchecked task
+- [x] Checked task
+- [X] Also checked
+"""
+        tasks = extract_all_tasks_from_pr_body(pr_body)
+        assert tasks == {
+            "Unchecked task": False,
+            "Checked task": True,
+            "Also checked": True,
+        }
+
+
+class TestUpdatePRBodyCheckboxes:
+    """Tests for checkbox update logic."""
+
+    def test_checks_completed_task(self) -> None:
+        pr_body = "- [ ] Fix the bug\n- [ ] Add tests"
+        updated = update_pr_body_checkboxes(pr_body, ["Fix the bug"])
+        assert "- [x] Fix the bug" in updated
+        assert "- [ ] Add tests" in updated
+
+    def test_preserves_already_checked(self) -> None:
+        pr_body = "- [x] Already done\n- [ ] New task"
+        updated = update_pr_body_checkboxes(pr_body, ["New task"])
+        assert "- [x] Already done" in updated
+        assert "- [x] New task" in updated
+
+    def test_handles_special_characters_in_task(self) -> None:
+        pr_body = "- [ ] Fix bug (issue #123)"
+        updated = update_pr_body_checkboxes(pr_body, ["Fix bug (issue #123)"])
+        assert "- [x] Fix bug (issue #123)" in updated
+
+    def test_handles_no_matches(self) -> None:
+        pr_body = "- [ ] Task A"
+        updated = update_pr_body_checkboxes(pr_body, ["Nonexistent task"])
+        assert updated == pr_body
+
+    def test_preserves_indentation(self) -> None:
+        pr_body = "  - [ ] Indented task"
+        updated = update_pr_body_checkboxes(pr_body, ["Indented task"])
+        assert "  - [x] Indented task" in updated
+
+
+class TestCLIScript:
+    """Integration tests for the CLI script."""
+
+    @pytest.fixture
+    def sample_session_file(self, tmp_path: Path) -> Path:
+        """Create a sample JSONL session file."""
+        session_content = """{"type": "thread.started", "thread_id": "test123"}
+{"type": "turn.started", "turn_id": "turn1"}
+{"type": "item.completed", "item_type": "agent_message", "content": "I have fixed the bug in calculator.py. The tests now pass."}
+{"type": "item.completed", "item_type": "command_execution", "command": "pytest", "exit_code": 0}
+{"type": "turn.completed", "turn_id": "turn1"}
+"""
+        session_file = tmp_path / "session.jsonl"
+        session_file.write_text(session_content)
+        return session_file
+
+    @pytest.fixture
+    def sample_pr_body_file(self, tmp_path: Path) -> Path:
+        """Create a sample PR body file."""
+        pr_body = """## Tasks
+- [ ] Fix the bug
+- [ ] Add tests
+- [ ] Update documentation
+"""
+        pr_body_file = tmp_path / "pr_body.md"
+        pr_body_file.write_text(pr_body)
+        return pr_body_file
+
+    def test_cli_runs_with_task_args(self, sample_session_file: Path, tmp_path: Path) -> None:
+        """Test CLI with --tasks argument."""
+        result = subprocess.run(
+            [
+                sys.executable,
+                "scripts/analyze_codex_session.py",
+                "--session-file",
+                str(sample_session_file),
+                "--tasks",
+                "Fix the bug",
+                "Add tests",
+                "--output",
+                "json",
+            ],
+            capture_output=True,
+            text=True,
+            cwd=Path(__file__).parent.parent.parent,
+        )
+
+        # Should succeed (exit 0)
+        assert result.returncode == 0, f"stderr: {result.stderr}"
+
+        # Output should be valid JSON
+        output = json.loads(result.stdout)
+        assert "provider" in output
+        assert "confidence" in output
+
+    def test_cli_runs_with_pr_body_file(
+        self, sample_session_file: Path, sample_pr_body_file: Path
+    ) -> None:
+        """Test CLI with --pr-body-file argument."""
+        result = subprocess.run(
+            [
+                sys.executable,
+                "scripts/analyze_codex_session.py",
+                "--session-file",
+                str(sample_session_file),
+                "--pr-body-file",
+                str(sample_pr_body_file),
+                "--output",
+                "json",
+            ],
+            capture_output=True,
+            text=True,
+            cwd=Path(__file__).parent.parent.parent,
+        )
+
+        assert result.returncode == 0, f"stderr: {result.stderr}"
+
+    def test_cli_returns_2_for_missing_session(self, tmp_path: Path) -> None:
+        """Test CLI returns exit code 2 for missing session file."""
+        result = subprocess.run(
+            [
+                sys.executable,
+                "scripts/analyze_codex_session.py",
+                "--session-file",
+                str(tmp_path / "nonexistent.jsonl"),
+                "--tasks",
+                "Some task",
+            ],
+            capture_output=True,
+            text=True,
+            cwd=Path(__file__).parent.parent.parent,
+        )
+
+        assert result.returncode == 2
+
+    def test_cli_markdown_output(self, sample_session_file: Path, tmp_path: Path) -> None:
+        """Test CLI with markdown output format."""
+        result = subprocess.run(
+            [
+                sys.executable,
+                "scripts/analyze_codex_session.py",
+                "--session-file",
+                str(sample_session_file),
+                "--tasks",
+                "Fix the bug",
+                "--output",
+                "markdown",
+            ],
+            capture_output=True,
+            text=True,
+            cwd=Path(__file__).parent.parent.parent,
+        )
+
+        assert result.returncode == 0
+        assert "**Analysis Summary**" in result.stdout
+
+    def test_cli_update_pr_body_option(
+        self, sample_session_file: Path, sample_pr_body_file: Path, tmp_path: Path
+    ) -> None:
+        """Test CLI with --update-pr-body option."""
+        updated_file = tmp_path / "updated_body.md"
+
+        # Mock the LLM to return a known completion
+        with patch("tools.llm_provider.get_llm_provider") as mock_provider:
+            from tools.llm_provider import RegexFallbackProvider
+
+            mock_provider.return_value = RegexFallbackProvider()
+
+            result = subprocess.run(
+                [
+                    sys.executable,
+                    "scripts/analyze_codex_session.py",
+                    "--session-file",
+                    str(sample_session_file),
+                    "--pr-body-file",
+                    str(sample_pr_body_file),
+                    "--output",
+                    "json",
+                    "--update-pr-body",
+                    "--updated-body-file",
+                    str(updated_file),
+                ],
+                capture_output=True,
+                text=True,
+                cwd=Path(__file__).parent.parent.parent,
+            )
+
+            assert result.returncode == 0
diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/tools/test_codex_jsonl_parser.py b/tests/tools/test_codex_jsonl_parser.py
new file mode 100644
index 000000000..82cb1429a
--- /dev/null
+++ b/tests/tools/test_codex_jsonl_parser.py
@@ -0,0 +1,261 @@
+"""Tests for tools/codex_jsonl_parser.py"""
+
+from tools.codex_jsonl_parser import (
+    CodexSession,
+    CommandExecution,
+    FileChange,
+    TodoItem,
+    parse_codex_jsonl,
+)
+
+
+class TestCodexJSONLParser:
+    """Test JSONL parsing functionality."""
+
+    def test_parse_empty_content(self):
+        """Empty content returns empty session."""
+        session = parse_codex_jsonl("")
+        assert session.raw_event_count == 0
+        assert session.thread_id is None
+        assert len(session.agent_messages) == 0
+
+    def test_parse_thread_started(self):
+        """Thread started event sets thread_id."""
+        jsonl = '{"type": "thread.started", "thread_id": "test-123"}'
+        session = parse_codex_jsonl(jsonl)
+        assert session.thread_id == "test-123"
+        assert session.raw_event_count == 1
+
+    def test_parse_turn_lifecycle(self):
+        """Turn events are tracked correctly."""
+        jsonl = """
+{"type": "turn.started", "turn_id": "turn-1"}
+{"type": "turn.completed", "turn_id": "turn-1", "token_usage": {"input_tokens": 100, "output_tokens": 50}}
+"""
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.turns) == 1
+        assert session.turns[0].turn_id == "turn-1"
+        assert session.turns[0].completed is True
+        assert session.turns[0].input_tokens == 100
+        assert session.turns[0].output_tokens == 50
+
+    def test_parse_turn_failed(self):
+        """Failed turns are tracked."""
+        jsonl = """
+{"type": "turn.started", "turn_id": "turn-1"}
+{"type": "turn.failed", "turn_id": "turn-1", "error": "Rate limited"}
+"""
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.turns) == 1
+        assert session.turns[0].failed is True
+        assert session.turns[0].error == "Rate limited"
+
+    def test_parse_agent_message_streaming(self):
+        """Agent messages with streaming updates are captured."""
+        jsonl = """
+{"type": "item.started", "item_id": "msg-1", "item_type": "agent_message"}
+{"type": "item.updated", "item_id": "msg-1", "content": "Hello "}
+{"type": "item.updated", "item_id": "msg-1", "content": "world!"}
+{"type": "item.completed", "item_id": "msg-1"}
+"""
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.agent_messages) == 1
+        assert session.agent_messages[0] == "Hello world!"
+
+    def test_parse_agent_message_old_schema(self):
+        """Old schema (assistant_message) is supported."""
+        jsonl = '{"type": "item.completed", "item_type": "assistant_message", "content": "Done!"}'
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.agent_messages) == 1
+        assert session.agent_messages[0] == "Done!"
+
+    def test_parse_reasoning(self):
+        """Reasoning summaries are captured."""
+        jsonl = '{"type": "item.completed", "item_type": "reasoning", "content": "I should fix the tests first."}'
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.reasoning_summaries) == 1
+        assert "fix the tests" in session.reasoning_summaries[0]
+
+    def test_parse_command_execution(self):
+        """Command executions are tracked."""
+        jsonl = '{"type": "item.completed", "item_type": "command_execution", "command": "pytest tests/", "exit_code": 0, "output": "1 passed"}'
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.commands) == 1
+        assert session.commands[0].command == "pytest tests/"
+        assert session.commands[0].exit_code == 0
+        assert len(session.successful_commands) == 1
+        assert len(session.failed_commands) == 0
+
+    def test_parse_failed_command(self):
+        """Failed commands are tracked separately."""
+        jsonl = '{"type": "item.completed", "item_type": "command_execution", "command": "pytest", "exit_code": 1}'
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.failed_commands) == 1
+        assert session.failed_commands[0].exit_code == 1
+
+    def test_parse_file_change(self):
+        """File changes are tracked."""
+        jsonl = '{"type": "item.completed", "item_type": "file_change", "path": "src/main.py", "change_type": "modified"}'
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.file_changes) == 1
+        assert session.file_changes[0].path == "src/main.py"
+        assert session.file_changes[0].change_type == "modified"
+
+    def test_parse_todo_list(self):
+        """Todo list items are extracted."""
+        jsonl = '{"type": "item.completed", "item_type": "todo_list", "items": [{"task": "Fix tests", "status": "completed"}, {"task": "Update docs", "status": "in_progress"}]}'
+        session = parse_codex_jsonl(jsonl)
+        assert len(session.todo_items) == 2
+        assert session.todo_items[0].task == "Fix tests"
+        assert session.todo_items[0].status == "completed"
+        assert len(session.completed_todos) == 1
+
+    def test_parse_handles_invalid_json(self):
+        """Invalid JSON lines are logged but don't crash."""
+        jsonl = """
+{"type": "thread.started", "thread_id": "test"}
+not valid json
+{"type": "turn.started", "turn_id": "turn-1"}
+"""
+        session = parse_codex_jsonl(jsonl)
+        assert session.thread_id == "test"
+        assert len(session.turns) == 1
+        assert len(session.parse_errors) == 1
+
+    def test_total_tokens(self):
+        """Token totals are calculated across turns."""
+        jsonl = """
+{"type": "turn.started", "turn_id": "turn-1"}
+{"type": "turn.completed", "turn_id": "turn-1", "token_usage": {"input_tokens": 100, "output_tokens": 50}}
+{"type": "turn.started", "turn_id": "turn-2"}
+{"type": "turn.completed", "turn_id": "turn-2", "token_usage": {"input_tokens": 200, "output_tokens": 100}}
+"""
+        session = parse_codex_jsonl(jsonl)
+        assert session.total_input_tokens == 300
+        assert session.total_output_tokens == 150
+
+
+class TestCodexSessionAnalysisText:
+    """Test analysis text generation."""
+
+    def test_get_analysis_text_with_messages(self):
+        """Analysis text includes agent messages."""
+        session = CodexSession(
+            agent_messages=["I completed the task successfully."],
+        )
+        text = session.get_analysis_text()
+        assert "Agent Messages" in text
+        assert "completed the task" in text
+
+    def test_get_analysis_text_with_reasoning(self):
+        """Reasoning is included when requested."""
+        session = CodexSession(
+            reasoning_summaries=["I should check the tests."],
+        )
+        text = session.get_analysis_text(include_reasoning=True)
+        assert "Reasoning" in text
+        assert "check the tests" in text
+
+    def test_get_analysis_text_without_reasoning(self):
+        """Reasoning can be excluded."""
+        session = CodexSession(
+            reasoning_summaries=["Secret thoughts"],
+        )
+        text = session.get_analysis_text(include_reasoning=False)
+        assert "Secret thoughts" not in text
+
+    def test_get_analysis_text_with_todos(self):
+        """Todo items are formatted with status."""
+        session = CodexSession(
+            todo_items=[
+                TodoItem(task="Fix tests", status="completed"),
+                TodoItem(task="Update docs", status="in_progress"),
+            ],
+        )
+        text = session.get_analysis_text()
+        assert "Todo List" in text
+        assert "✓ Fix tests" in text
+        assert "→ Update docs" in text
+
+    def test_get_analysis_text_with_files(self):
+        """File changes are listed."""
+        session = CodexSession(
+            file_changes=[
+                FileChange(path="src/main.py", change_type="modified"),
+                FileChange(path="tests/test_main.py", change_type="added"),
+            ],
+        )
+        text = session.get_analysis_text()
+        assert "Files Modified" in text
+        assert "modified: src/main.py" in text
+        assert "added: tests/test_main.py" in text
+
+    def test_get_analysis_text_with_commands(self):
+        """Command summary is included."""
+        session = CodexSession(
+            commands=[
+                CommandExecution(command="pytest", exit_code=0, output=""),
+                CommandExecution(command="black .", exit_code=0, output=""),
+                CommandExecution(command="mypy", exit_code=1, output="error"),
+            ],
+        )
+        text = session.get_analysis_text()
+        assert "Commands Executed" in text
+        assert "Total: 3" in text
+        assert "Successful: 2" in text
+        assert "Failed: 1" in text
+
+
+class TestCompleteSession:
+    """Test parsing a complete realistic session."""
+
+    def test_parse_realistic_session(self):
+        """Parse a realistic multi-turn session."""
+        jsonl = """
+{"type": "thread.started", "thread_id": "session-abc"}
+{"type": "turn.started", "turn_id": "turn-1"}
+{"type": "item.started", "item_id": "reason-1", "item_type": "reasoning"}
+{"type": "item.updated", "item_id": "reason-1", "content": "The user wants me to fix tests. I'll run pytest first."}
+{"type": "item.completed", "item_id": "reason-1"}
+{"type": "item.completed", "item_type": "command_execution", "command": "pytest tests/", "exit_code": 1, "output": "2 failed"}
+{"type": "item.started", "item_id": "msg-1", "item_type": "agent_message"}
+{"type": "item.updated", "item_id": "msg-1", "content": "I found 2 failing tests. Let me fix them."}
+{"type": "item.completed", "item_id": "msg-1"}
+{"type": "item.completed", "item_type": "file_change", "path": "tests/test_calc.py", "change_type": "modified"}
+{"type": "item.completed", "item_type": "command_execution", "command": "pytest tests/", "exit_code": 0, "output": "all passed"}
+{"type": "item.started", "item_id": "msg-2", "item_type": "agent_message"}
+{"type": "item.updated", "item_id": "msg-2", "content": "All tests pass now. The fix was to update the expected value."}
+{"type": "item.completed", "item_id": "msg-2"}
+{"type": "turn.completed", "turn_id": "turn-1", "token_usage": {"input_tokens": 500, "output_tokens": 200}}
+"""
+        session = parse_codex_jsonl(jsonl)
+
+        # Check overall structure
+        assert session.thread_id == "session-abc"
+        assert len(session.turns) == 1
+        assert session.turns[0].completed
+
+        # Check content
+        assert len(session.reasoning_summaries) == 1
+        assert "run pytest" in session.reasoning_summaries[0]
+
+        assert len(session.agent_messages) == 2
+        assert "2 failing tests" in session.agent_messages[0]
+        assert "All tests pass" in session.agent_messages[1]
+
+        assert len(session.commands) == 2
+        assert len(session.successful_commands) == 1
+        assert len(session.failed_commands) == 1
+
+        assert len(session.file_changes) == 1
+        assert session.file_changes[0].path == "tests/test_calc.py"
+
+        # Check tokens
+        assert session.total_input_tokens == 500
+        assert session.total_output_tokens == 200
+
+        # Check analysis text
+        text = session.get_analysis_text()
+        assert "All tests pass" in text
+        assert "Files Modified" in text
+        assert "Commands Executed" in text
diff --git a/tests/tools/test_llm_provider.py b/tests/tools/test_llm_provider.py
new file mode 100644
index 000000000..714a10b9f
--- /dev/null
+++ b/tests/tools/test_llm_provider.py
@@ -0,0 +1,232 @@
+"""Tests for tools/llm_provider.py"""
+
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from tools.llm_provider import (
+    CompletionAnalysis,
+    FallbackChainProvider,
+    GitHubModelsProvider,
+    OpenAIProvider,
+    RegexFallbackProvider,
+    check_providers,
+    get_llm_provider,
+)
+
+
+class TestProviderAvailability:
+    """Test provider availability checks."""
+
+    def test_github_models_available_with_token(self):
+        """GitHub Models is available when GITHUB_TOKEN is set."""
+        with patch.dict(os.environ, {"GITHUB_TOKEN": "test-token"}):
+            provider = GitHubModelsProvider()
+            assert provider.is_available() is True
+
+    def test_github_models_unavailable_without_token(self):
+        """GitHub Models is unavailable without GITHUB_TOKEN."""
+        env = {k: v for k, v in os.environ.items() if k != "GITHUB_TOKEN"}
+        with patch.dict(os.environ, env, clear=True):
+            provider = GitHubModelsProvider()
+            assert provider.is_available() is False
+
+    def test_openai_available_with_key(self):
+        """OpenAI is available when OPENAI_API_KEY is set."""
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test"}):
+            provider = OpenAIProvider()
+            assert provider.is_available() is True
+
+    def test_openai_unavailable_without_key(self):
+        """OpenAI is unavailable without OPENAI_API_KEY."""
+        env = {k: v for k, v in os.environ.items() if k != "OPENAI_API_KEY"}
+        with patch.dict(os.environ, env, clear=True):
+            provider = OpenAIProvider()
+            assert provider.is_available() is False
+
+    def test_regex_always_available(self):
+        """Regex fallback is always available."""
+        provider = RegexFallbackProvider()
+        assert provider.is_available() is True
+
+    def test_check_providers_returns_dict(self):
+        """check_providers returns availability dict."""
+        result = check_providers()
+        assert isinstance(result, dict)
+        assert "github-models" in result
+        assert "openai" in result
+        assert "regex-fallback" in result
+        assert result["regex-fallback"] is True
+
+
+class TestRegexFallbackProvider:
+    """Test regex-based analysis."""
+
+    def test_detects_completion_keywords(self):
+        """Regex detects completion keywords."""
+        provider = RegexFallbackProvider()
+        tasks = ["Fix the calculator tests"]
+        output = "I have completed fixing the calculator tests. They all pass now."
+
+        result = provider.analyze_completion(output, tasks)
+        assert len(result.completed_tasks) == 1
+        assert result.provider_used == "regex-fallback"
+        assert result.confidence < 0.5  # Low confidence for regex
+
+    def test_detects_progress_keywords(self):
+        """Regex detects progress keywords."""
+        provider = RegexFallbackProvider()
+        tasks = ["Update documentation"]
+        output = "I'm working on updating the documentation now."
+
+        result = provider.analyze_completion(output, tasks)
+        assert len(result.in_progress_tasks) == 1
+
+    def test_detects_blocker_keywords(self):
+        """Regex detects blocker keywords."""
+        provider = RegexFallbackProvider()
+        tasks = ["Deploy to production"]
+        output = "I'm blocked on the deploy - there's an error with credentials."
+
+        result = provider.analyze_completion(output, tasks)
+        assert len(result.blocked_tasks) == 1
+
+    def test_no_false_positives_without_keywords(self):
+        """No detection without relevant keywords."""
+        provider = RegexFallbackProvider()
+        tasks = ["Implement feature X"]
+        output = "Looking at the codebase structure."
+
+        result = provider.analyze_completion(output, tasks)
+        assert len(result.completed_tasks) == 0
+        assert len(result.in_progress_tasks) == 0
+        assert len(result.blocked_tasks) == 0
+
+
+class TestFallbackChainProvider:
+    """Test fallback chain behavior."""
+
+    def test_uses_first_available_provider(self):
+        """Chain uses first available provider."""
+        mock_provider1 = MagicMock()
+        mock_provider1.name = "mock1"
+        mock_provider1.is_available.return_value = False
+
+        mock_provider2 = MagicMock()
+        mock_provider2.name = "mock2"
+        mock_provider2.is_available.return_value = True
+        mock_provider2.analyze_completion.return_value = CompletionAnalysis(
+            completed_tasks=["task1"],
+            in_progress_tasks=[],
+            blocked_tasks=[],
+            confidence=0.9,
+            reasoning="test",
+            provider_used="mock2",
+        )
+
+        chain = FallbackChainProvider([mock_provider1, mock_provider2])
+        result = chain.analyze_completion("output", ["task1"])
+
+        mock_provider1.analyze_completion.assert_not_called()
+        mock_provider2.analyze_completion.assert_called()
+        assert result.provider_used == "mock2"
+
+    def test_falls_back_on_error(self):
+        """Chain falls back when provider raises error."""
+        mock_provider1 = MagicMock()
+        mock_provider1.name = "mock1"
+        mock_provider1.is_available.return_value = True
+        mock_provider1.analyze_completion.side_effect = RuntimeError("API error")
+
+        mock_provider2 = MagicMock()
+        mock_provider2.name = "mock2"
+        mock_provider2.is_available.return_value = True
+        mock_provider2.analyze_completion.return_value = CompletionAnalysis(
+            completed_tasks=[],
+            in_progress_tasks=[],
+            blocked_tasks=[],
+            confidence=0.5,
+            reasoning="fallback",
+            provider_used="mock2",
+        )
+
+        chain = FallbackChainProvider([mock_provider1, mock_provider2])
+        result = chain.analyze_completion("output", ["task1"])
+
+        assert result.provider_used == "mock2"
+
+    def test_raises_when_all_fail(self):
+        """Chain raises error when all providers fail."""
+        mock_provider = MagicMock()
+        mock_provider.name = "mock"
+        mock_provider.is_available.return_value = True
+        mock_provider.analyze_completion.side_effect = RuntimeError("Failed")
+
+        chain = FallbackChainProvider([mock_provider])
+
+        with pytest.raises(RuntimeError, match="All providers failed"):
+            chain.analyze_completion("output", ["task1"])
+
+
+class TestGetLLMProvider:
+    """Test get_llm_provider factory."""
+
+    def test_returns_fallback_chain(self):
+        """get_llm_provider returns a FallbackChainProvider."""
+        provider = get_llm_provider()
+        assert isinstance(provider, FallbackChainProvider)
+
+    def test_chain_always_available(self):
+        """Chain is always available (regex fallback)."""
+        provider = get_llm_provider()
+        assert provider.is_available() is True
+
+
+class TestCompletionAnalysis:
+    """Test CompletionAnalysis dataclass."""
+
+    def test_dataclass_creation(self):
+        """CompletionAnalysis can be created."""
+        analysis = CompletionAnalysis(
+            completed_tasks=["task1", "task2"],
+            in_progress_tasks=["task3"],
+            blocked_tasks=[],
+            confidence=0.85,
+            reasoning="Tasks 1 and 2 were completed based on output.",
+            provider_used="test",
+        )
+        assert len(analysis.completed_tasks) == 2
+        assert analysis.confidence == 0.85
+
+
+class TestGitHubModelsProvider:
+    """Test GitHub Models provider (mocked)."""
+
+    def test_parse_response_valid_json(self):
+        """Parses valid JSON response."""
+        provider = GitHubModelsProvider()
+        response = """
+Here's my analysis:
+{
+    "completed": ["task1"],
+    "in_progress": ["task2"],
+    "blocked": [],
+    "confidence": 0.9,
+    "reasoning": "Task 1 was explicitly marked done."
+}
+"""
+        result = provider._parse_response(response, ["task1", "task2"])
+        assert result.completed_tasks == ["task1"]
+        assert result.in_progress_tasks == ["task2"]
+        assert result.confidence == 0.9
+
+    def test_parse_response_invalid_json(self):
+        """Handles invalid JSON gracefully."""
+        provider = GitHubModelsProvider()
+        response = "I couldn't analyze this properly."
+
+        result = provider._parse_response(response, ["task1"])
+        assert result.completed_tasks == []
+        assert result.confidence == 0.0
+        assert "parse" in result.reasoning.lower()
diff --git a/tools/codex_jsonl_parser.py b/tools/codex_jsonl_parser.py
new file mode 100644
index 000000000..60fab970e
--- /dev/null
+++ b/tools/codex_jsonl_parser.py
@@ -0,0 +1,376 @@
+"""
+Codex JSONL Event Parser
+
+Parses the JSONL event stream from `codex exec --json` for task completion analysis.
+
+Event types supported:
+- thread.started / turn.started / turn.completed / turn.failed
+- item.started / item.updated / item.completed
+- Item types: agent_message, reasoning, command_execution, file_change, todo_list
+
+Usage:
+    from tools.codex_jsonl_parser import parse_codex_jsonl, CodexSession
+
+    session = parse_codex_jsonl(jsonl_content)
+    print(session.agent_messages)
+    print(session.file_changes)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CommandExecution:
+    """Represents a shell command executed by Codex."""
+
+    command: str
+    exit_code: int
+    output: str
+    duration_seconds: float | None = None
+
+
+@dataclass
+class FileChange:
+    """Represents a file modification by Codex."""
+
+    path: str
+    change_type: str  # added, modified, deleted
+    content_preview: str | None = None
+
+
+@dataclass
+class TodoItem:
+    """Represents a task in Codex's todo list."""
+
+    task: str
+    status: str  # completed, in_progress, not_started, blocked
+
+
+@dataclass
+class TurnInfo:
+    """Information about a conversation turn."""
+
+    turn_id: str
+    input_tokens: int = 0
+    output_tokens: int = 0
+    reasoning_tokens: int = 0
+    completed: bool = False
+    failed: bool = False
+    error: str | None = None
+
+
+@dataclass
+class CodexSession:
+    """Parsed Codex session data from JSONL events."""
+
+    # Thread info
+    thread_id: str | None = None
+
+    # Turns
+    turns: list[TurnInfo] = field(default_factory=list)
+
+    # High-value content for analysis
+    agent_messages: list[str] = field(default_factory=list)
+    reasoning_summaries: list[str] = field(default_factory=list)
+
+    # Concrete work evidence
+    commands: list[CommandExecution] = field(default_factory=list)
+    file_changes: list[FileChange] = field(default_factory=list)
+
+    # Direct task mapping (if available)
+    todo_items: list[TodoItem] = field(default_factory=list)
+
+    # Raw events (for debugging)
+    raw_event_count: int = 0
+    parse_errors: list[str] = field(default_factory=list)
+
+    @property
+    def total_input_tokens(self) -> int:
+        return sum(t.input_tokens for t in self.turns)
+
+    @property
+    def total_output_tokens(self) -> int:
+        return sum(t.output_tokens for t in self.turns)
+
+    @property
+    def successful_commands(self) -> list[CommandExecution]:
+        return [c for c in self.commands if c.exit_code == 0]
+
+    @property
+    def failed_commands(self) -> list[CommandExecution]:
+        return [c for c in self.commands if c.exit_code != 0]
+
+    @property
+    def completed_todos(self) -> list[TodoItem]:
+        return [t for t in self.todo_items if t.status == "completed"]
+
+    def get_analysis_text(self, include_reasoning: bool = True) -> str:
+        """
+        Get consolidated text suitable for LLM analysis.
+
+        Args:
+            include_reasoning: Whether to include reasoning summaries
+
+        Returns:
+            Formatted text with key session information
+        """
+        sections = []
+
+        # Agent messages (highest signal)
+        if self.agent_messages:
+            sections.append("## Agent Messages")
+            for msg in self.agent_messages:
+                sections.append(msg[:2000])  # Truncate long messages
+                sections.append("")
+
+        # Reasoning (if requested)
+        if include_reasoning and self.reasoning_summaries:
+            sections.append("## Reasoning Summaries")
+            for reason in self.reasoning_summaries:
+                sections.append(reason[:1000])
+                sections.append("")
+
+        # Todo list (direct task mapping)
+        if self.todo_items:
+            sections.append("## Todo List")
+            for item in self.todo_items:
+                status_emoji = {
+                    "completed": "✓",
+                    "in_progress": "→",
+                    "blocked": "✗",
+                    "not_started": "○",
+                }.get(item.status, "?")
+                sections.append(f"{status_emoji} {item.task}")
+            sections.append("")
+
+        # File changes (concrete evidence)
+        if self.file_changes:
+            sections.append("## Files Modified")
+            for fc in self.file_changes:
+                sections.append(f"- {fc.change_type}: {fc.path}")
+            sections.append("")
+
+        # Command summary
+        if self.commands:
+            sections.append("## Commands Executed")
+            sections.append(f"- Total: {len(self.commands)}")
+            sections.append(f"- Successful: {len(self.successful_commands)}")
+            sections.append(f"- Failed: {len(self.failed_commands)}")
+            if self.failed_commands:
+                sections.append("- Failed commands:")
+                for cmd in self.failed_commands[:3]:  # Limit to first 3
+                    sections.append(f"  - {cmd.command[:100]} (exit {cmd.exit_code})")
+            sections.append("")
+
+        return "\n".join(sections)
+
+
+class CodexJSONLParser:
+    """Parser for Codex JSONL event streams."""
+
+    def __init__(self):
+        self._session = CodexSession()
+        self._current_items: dict[str, dict] = {}  # item_id -> item data
+
+    def parse(self, jsonl_content: str) -> CodexSession:
+        """
+        Parse JSONL content into a CodexSession.
+
+        Args:
+            jsonl_content: Raw JSONL string (one JSON object per line)
+
+        Returns:
+            Parsed CodexSession
+        """
+        for line_num, line in enumerate(jsonl_content.strip().split("\n"), 1):
+            line = line.strip()
+            if not line:
+                continue
+
+            try:
+                event = json.loads(line)
+                self._process_event(event)
+                self._session.raw_event_count += 1
+            except json.JSONDecodeError as e:
+                error_msg = f"Line {line_num}: JSON parse error: {e}"
+                logger.warning(error_msg)
+                self._session.parse_errors.append(error_msg)
+            except Exception as e:
+                error_msg = f"Line {line_num}: Processing error: {e}"
+                logger.warning(error_msg)
+                self._session.parse_errors.append(error_msg)
+
+        return self._session
+
+    def _process_event(self, event: dict[str, Any]) -> None:
+        """Process a single event."""
+        event_type = event.get("type", "")
+
+        # Thread events
+        if event_type == "thread.started":
+            self._session.thread_id = event.get("thread_id")
+
+        # Turn events
+        elif event_type == "turn.started":
+            turn = TurnInfo(turn_id=event.get("turn_id", ""))
+            self._session.turns.append(turn)
+
+        elif event_type == "turn.completed":
+            turn_id = event.get("turn_id")
+            usage = event.get("token_usage", {})
+            for turn in self._session.turns:
+                if turn.turn_id == turn_id:
+                    turn.completed = True
+                    turn.input_tokens = usage.get("input_tokens", 0)
+                    turn.output_tokens = usage.get("output_tokens", 0)
+                    turn.reasoning_tokens = usage.get("reasoning_tokens", 0)
+                    break
+
+        elif event_type == "turn.failed":
+            turn_id = event.get("turn_id")
+            for turn in self._session.turns:
+                if turn.turn_id == turn_id:
+                    turn.failed = True
+                    turn.error = event.get("error")
+                    break
+
+        # Item events
+        elif event_type == "item.started":
+            item_id = event.get("item_id")
+            # Handle both old (item_type) and new (type in nested object) schemas
+            item_type = event.get("item_type") or event.get("item", {}).get("type")
+            if item_id:
+                self._current_items[item_id] = {
+                    "type": item_type,
+                    "content": "",
+                }
+
+        elif event_type == "item.updated":
+            item_id = event.get("item_id")
+            if item_id in self._current_items:
+                # Append content updates
+                content = event.get("content", "")
+                self._current_items[item_id]["content"] += content
+
+        elif event_type == "item.completed":
+            item_id = event.get("item_id")
+            item_data = self._current_items.pop(item_id, None)
+
+            if not item_data:
+                # Try to get item type from event itself
+                item_type = event.get("item_type") or event.get("item", {}).get("type")
+                item_data = {"type": item_type, "content": ""}
+
+            item_type = item_data.get("type")
+            content = item_data.get("content", "") or event.get("content", "")
+
+            self._handle_completed_item(item_type, content, event)
+
+    def _handle_completed_item(
+        self, item_type: str | None, content: str, event: dict[str, Any]
+    ) -> None:
+        """Handle a completed item based on its type."""
+
+        # Handle schema variations (old: assistant_message, new: agent_message)
+        if item_type in ("agent_message", "assistant_message"):
+            if content:
+                self._session.agent_messages.append(content)
+
+        elif item_type == "reasoning":
+            if content:
+                self._session.reasoning_summaries.append(content)
+
+        elif item_type == "command_execution":
+            cmd = CommandExecution(
+                command=event.get("command", content),
+                exit_code=event.get("exit_code", 0),
+                output=event.get("output", ""),
+                duration_seconds=event.get("duration"),
+            )
+            self._session.commands.append(cmd)
+
+        elif item_type == "file_change":
+            fc = FileChange(
+                path=event.get("path", ""),
+                change_type=event.get("change_type", "modified"),
+                content_preview=content[:500] if content else None,
+            )
+            self._session.file_changes.append(fc)
+
+        elif item_type == "todo_list":
+            # Parse todo items from content or event
+            items = event.get("items", [])
+            if not items and content:
+                # Try to parse from content
+                import contextlib
+
+                with contextlib.suppress(json.JSONDecodeError):
+                    items = json.loads(content)
+
+            for item in items:
+                if isinstance(item, dict):
+                    todo = TodoItem(
+                        task=item.get("task", ""),
+                        status=item.get("status", "not_started"),
+                    )
+                    self._session.todo_items.append(todo)
+
+
+def parse_codex_jsonl(jsonl_content: str) -> CodexSession:
+    """
+    Parse Codex JSONL event stream.
+
+    Args:
+        jsonl_content: Raw JSONL string from `codex exec --json`
+
+    Returns:
+        Parsed CodexSession with all extracted information
+    """
+    parser = CodexJSONLParser()
+    return parser.parse(jsonl_content)
+
+
+def parse_codex_jsonl_file(file_path: str | Path) -> CodexSession:
+    """
+    Parse Codex JSONL from a file.
+
+    Args:
+        file_path: Path to JSONL file
+
+    Returns:
+        Parsed CodexSession
+    """
+    path = Path(file_path)
+    content = path.read_text()
+    return parse_codex_jsonl(content)
+
+
+if __name__ == "__main__":
+    # Example usage
+    sample_jsonl = """
+{"type": "thread.started", "thread_id": "abc123"}
+{"type": "turn.started", "turn_id": "turn1"}
+{"type": "item.started", "item_id": "msg1", "item_type": "agent_message"}
+{"type": "item.updated", "item_id": "msg1", "content": "I'll fix the test failures "}
+{"type": "item.updated", "item_id": "msg1", "content": "in the calculator module."}
+{"type": "item.completed", "item_id": "msg1"}
+{"type": "item.completed", "item_type": "command_execution", "command": "pytest tests/", "exit_code": 0}
+{"type": "item.completed", "item_type": "file_change", "path": "src/calc.py", "change_type": "modified"}
+{"type": "turn.completed", "turn_id": "turn1", "token_usage": {"input_tokens": 1000, "output_tokens": 500}}
+"""
+
+    session = parse_codex_jsonl(sample_jsonl)
+    print(f"Thread ID: {session.thread_id}")
+    print(f"Events parsed: {session.raw_event_count}")
+    print(f"Agent messages: {len(session.agent_messages)}")
+    print(f"Commands: {len(session.commands)}")
+    print(f"File changes: {len(session.file_changes)}")
+    print(f"\nAnalysis text:\n{session.get_analysis_text()}")
diff --git a/tools/codex_session_analyzer.py b/tools/codex_session_analyzer.py
new file mode 100644
index 000000000..5878725b1
--- /dev/null
+++ b/tools/codex_session_analyzer.py
@@ -0,0 +1,271 @@
+"""
+Codex Session Analyzer
+
+Analyzes Codex session output to determine task completion status.
+Supports multiple data source options:
+- Option A: Final summary only (--output-last-message)
+- Option B: Full JSONL stream (--json)
+- Option B subset: Filtered to high-value events only
+
+Usage:
+    from tools.codex_session_analyzer import analyze_session, AnalysisResult
+
+    # From JSONL
+    result = analyze_session(jsonl_content, tasks, data_source="jsonl")
+
+    # From summary
+    result = analyze_session(summary_text, tasks, data_source="summary")
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Literal
+
+from tools.codex_jsonl_parser import CodexSession, parse_codex_jsonl
+from tools.llm_provider import CompletionAnalysis, get_llm_provider
+
+logger = logging.getLogger(__name__)
+
+DataSource = Literal["jsonl", "jsonl_filtered", "summary", "auto"]
+
+
+@dataclass
+class AnalysisResult:
+    """Complete analysis result with metadata."""
+
+    # Core analysis
+    completion: CompletionAnalysis
+
+    # Session metadata (if JSONL was parsed)
+    session: CodexSession | None = None
+
+    # Data source used
+    data_source: str = "unknown"
+
+    # Statistics
+    input_length: int = 0
+    analysis_text_length: int = 0
+
+    @property
+    def has_completions(self) -> bool:
+        """Check if any tasks were marked complete."""
+        return len(self.completion.completed_tasks) > 0
+
+    @property
+    def has_progress(self) -> bool:
+        """Check if any work was done (completed or in progress)."""
+        return (
+            len(self.completion.completed_tasks) > 0 or len(self.completion.in_progress_tasks) > 0
+        )
+
+    @property
+    def is_stalled(self) -> bool:
+        """Check if session appears stalled (no progress, maybe blocked)."""
+        return not self.has_progress and len(self.completion.blocked_tasks) > 0
+
+    def get_checkbox_updates(self) -> dict[str, bool]:
+        """
+        Get mapping of task -> checked status for PR body update.
+
+        Returns:
+            Dict mapping task text to checkbox state (True = checked)
+        """
+        updates = {}
+        for task in self.completion.completed_tasks:
+            updates[task] = True
+        # Don't uncheck anything - only mark completions
+        return updates
+
+    def get_summary(self) -> str:
+        """Get human-readable summary of the analysis."""
+        lines = [
+            f"**Analysis Summary** (confidence: {self.completion.confidence:.0%})",
+            f"- Provider: {self.completion.provider_used}",
+            f"- Data source: {self.data_source}",
+            "",
+        ]
+
+        if self.completion.completed_tasks:
+            lines.append("**Completed:**")
+            for task in self.completion.completed_tasks:
+                lines.append(f"- ✓ {task}")
+            lines.append("")
+
+        if self.completion.in_progress_tasks:
+            lines.append("**In Progress:**")
+            for task in self.completion.in_progress_tasks:
+                lines.append(f"- → {task}")
+            lines.append("")
+
+        if self.completion.blocked_tasks:
+            lines.append("**Blocked:**")
+            for task in self.completion.blocked_tasks:
+                lines.append(f"- ✗ {task}")
+            lines.append("")
+
+        if self.completion.reasoning:
+            lines.append(f"**Analysis:** {self.completion.reasoning}")
+
+        return "\n".join(lines)
+
+
+def analyze_session(
+    content: str,
+    tasks: list[str],
+    data_source: DataSource = "auto",
+    include_reasoning: bool = True,
+    context: str | None = None,
+) -> AnalysisResult:
+    """
+    Analyze Codex session output to determine task completion.
+
+    Args:
+        content: Session output (JSONL or summary text)
+        tasks: List of task descriptions from PR checkboxes
+        data_source: How to interpret content:
+            - "jsonl": Parse as full JSONL stream
+            - "jsonl_filtered": Parse JSONL, use only agent_message + reasoning
+            - "summary": Treat as plain text summary
+            - "auto": Auto-detect based on content
+        include_reasoning: Include reasoning summaries in analysis (for JSONL)
+        context: Additional context (PR description, etc.)
+
+    Returns:
+        AnalysisResult with completion status and metadata
+    """
+    # Auto-detect data source
+    if data_source == "auto":
+        data_source = _detect_data_source(content)
+        logger.info(f"Auto-detected data source: {data_source}")
+
+    session = None
+    analysis_text = content
+
+    # Parse JSONL if applicable
+    if data_source in ("jsonl", "jsonl_filtered"):
+        try:
+            session = parse_codex_jsonl(content)
+            analysis_text = session.get_analysis_text(
+                include_reasoning=(data_source == "jsonl" and include_reasoning)
+            )
+            logger.info(
+                f"Parsed JSONL: {session.raw_event_count} events, "
+                f"{len(session.agent_messages)} messages, "
+                f"{len(session.commands)} commands"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to parse as JSONL, falling back to summary: {e}")
+            data_source = "summary"
+            analysis_text = content
+
+    # Get LLM provider and analyze
+    provider = get_llm_provider()
+
+    try:
+        completion = provider.analyze_completion(
+            session_output=analysis_text,
+            tasks=tasks,
+            context=context,
+        )
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}")
+        # Return empty result on failure
+        completion = CompletionAnalysis(
+            completed_tasks=[],
+            in_progress_tasks=[],
+            blocked_tasks=[],
+            confidence=0.0,
+            reasoning=f"Analysis failed: {e}",
+            provider_used="error",
+        )
+
+    return AnalysisResult(
+        completion=completion,
+        session=session,
+        data_source=data_source,
+        input_length=len(content),
+        analysis_text_length=len(analysis_text),
+    )
+
+
+def _detect_data_source(content: str) -> DataSource:
+    """
+    Auto-detect whether content is JSONL or plain text.
+
+    Args:
+        content: Raw content to analyze
+
+    Returns:
+        Detected data source type
+    """
+    # Check first few lines for JSON structure
+    lines = content.strip().split("\n")[:5]
+    json_lines = 0
+
+    for line in lines:
+        line = line.strip()
+        if line.startswith("{") and line.endswith("}"):
+            json_lines += 1
+
+    # If most lines look like JSON, treat as JSONL
+    if json_lines >= len(lines) * 0.5:
+        return "jsonl"
+
+    return "summary"
+
+
+def analyze_from_files(
+    session_file: str,
+    tasks_file: str | None = None,
+    tasks: list[str] | None = None,
+) -> AnalysisResult:
+    """
+    Convenience function to analyze from file paths.
+
+    Args:
+        session_file: Path to session output file
+        tasks_file: Path to file with tasks (one per line)
+        tasks: List of tasks (alternative to tasks_file)
+
+    Returns:
+        AnalysisResult
+    """
+    from pathlib import Path
+
+    content = Path(session_file).read_text()
+
+    if tasks is None:
+        if tasks_file:
+            task_text = Path(tasks_file).read_text()
+            tasks = [t.strip() for t in task_text.split("\n") if t.strip()]
+        else:
+            raise ValueError("Either tasks or tasks_file must be provided")
+
+    return analyze_session(content, tasks)
+
+
+if __name__ == "__main__":
+
+    logging.basicConfig(level=logging.INFO)
+
+    # Example usage
+    sample_tasks = [
+        "Fix test failures in calculator module",
+        "Update documentation",
+        "Add type hints",
+    ]
+
+    sample_jsonl = """
+{"type": "thread.started", "thread_id": "abc123"}
+{"type": "turn.started", "turn_id": "turn1"}
+{"type": "item.completed", "item_type": "agent_message", "content": "I've completed fixing the test failures in the calculator module. The tests now pass. I'm starting work on the documentation updates."}
+{"type": "item.completed", "item_type": "command_execution", "command": "pytest tests/", "exit_code": 0}
+{"type": "item.completed", "item_type": "file_change", "path": "src/calc.py", "change_type": "modified"}
+{"type": "turn.completed", "turn_id": "turn1", "token_usage": {"input_tokens": 1000, "output_tokens": 500}}
+"""
+
+    print("Analyzing sample session...")
+    result = analyze_session(sample_jsonl, sample_tasks)
+    print(result.get_summary())
diff --git a/tools/llm_provider.py b/tools/llm_provider.py
new file mode 100644
index 000000000..cc0a5afd5
--- /dev/null
+++ b/tools/llm_provider.py
@@ -0,0 +1,421 @@
+"""
+LLM Provider Abstraction with Fallback Chain
+
+Provides a unified interface for LLM calls with automatic fallback:
+1. GitHub Models API (primary) - uses GITHUB_TOKEN
+2. OpenAI API (fallback) - uses OPENAI_API_KEY
+3. Regex patterns (last resort) - no API calls
+
+Usage:
+    from tools.llm_provider import get_llm_provider, LLMProvider
+
+    provider = get_llm_provider()
+    result = provider.analyze_completion(session_text, tasks)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+# GitHub Models API endpoint (OpenAI-compatible)
+GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
+DEFAULT_MODEL = "gpt-4o-mini"
+
+
+@dataclass
+class CompletionAnalysis:
+    """Result of task completion analysis."""
+
+    completed_tasks: list[str]  # Task descriptions marked complete
+    in_progress_tasks: list[str]  # Tasks currently being worked on
+    blocked_tasks: list[str]  # Tasks that are blocked
+    confidence: float  # 0.0 to 1.0
+    reasoning: str  # Explanation of the analysis
+    provider_used: str  # Which provider generated this
+
+
+class LLMProvider(ABC):
+    """Abstract base class for LLM providers."""
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Provider name for logging."""
+        pass
+
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if this provider can be used."""
+        pass
+
+    @abstractmethod
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        """
+        Analyze session output to determine task completion status.
+
+        Args:
+            session_output: Codex session output (summary or JSONL events)
+            tasks: List of task descriptions from PR checkboxes
+            context: Optional additional context (PR description, etc.)
+
+        Returns:
+            CompletionAnalysis with task status breakdown
+        """
+        pass
+
+
+class GitHubModelsProvider(LLMProvider):
+    """LLM provider using GitHub Models API (OpenAI-compatible)."""
+
+    @property
+    def name(self) -> str:
+        return "github-models"
+
+    def is_available(self) -> bool:
+        return bool(os.environ.get("GITHUB_TOKEN"))
+
+    def _get_client(self):
+        """Get LangChain ChatOpenAI client configured for GitHub Models."""
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            logger.warning("langchain_openai not installed")
+            return None
+
+        return ChatOpenAI(
+            model=DEFAULT_MODEL,
+            base_url=GITHUB_MODELS_BASE_URL,
+            api_key=os.environ.get("GITHUB_TOKEN"),
+            temperature=0.1,  # Low temperature for consistent analysis
+        )
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        client = self._get_client()
+        if not client:
+            raise RuntimeError("LangChain OpenAI not available")
+
+        prompt = self._build_analysis_prompt(session_output, tasks, context)
+
+        try:
+            response = client.invoke(prompt)
+            return self._parse_response(response.content, tasks)
+        except Exception as e:
+            logger.error(f"GitHub Models API error: {e}")
+            raise
+
+    def _build_analysis_prompt(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> str:
+        task_list = "\n".join(f"- [ ] {task}" for task in tasks)
+
+        return f"""Analyze this Codex session output and determine which tasks have been completed.
+
+## Tasks to Track
+{task_list}
+
+## Session Output
+{session_output[:8000]}  # Truncate to avoid token limits
+
+## Instructions
+For each task, determine if it was:
+- COMPLETED: Clear evidence the task was finished
+- IN_PROGRESS: Work started but not finished
+- BLOCKED: Cannot proceed due to an issue
+- NOT_STARTED: No evidence of work on this task
+
+Respond in JSON format:
+{{
+    "completed": ["task description 1", ...],
+    "in_progress": ["task description 2", ...],
+    "blocked": ["task description 3", ...],
+    "confidence": 0.85,
+    "reasoning": "Brief explanation of your analysis"
+}}
+
+Only include tasks in completed/in_progress/blocked if you have evidence. Be conservative - if unsure, don't mark as completed."""
+
+    def _parse_response(self, content: str, tasks: list[str]) -> CompletionAnalysis:
+        """Parse LLM response into CompletionAnalysis."""
+        try:
+            # Try to extract JSON from response
+            json_start = content.find("{")
+            json_end = content.rfind("}") + 1
+            if json_start >= 0 and json_end > json_start:
+                data = json.loads(content[json_start:json_end])
+            else:
+                raise ValueError("No JSON found in response")
+
+            return CompletionAnalysis(
+                completed_tasks=data.get("completed", []),
+                in_progress_tasks=data.get("in_progress", []),
+                blocked_tasks=data.get("blocked", []),
+                confidence=float(data.get("confidence", 0.5)),
+                reasoning=data.get("reasoning", ""),
+                provider_used=self.name,
+            )
+        except (json.JSONDecodeError, ValueError) as e:
+            logger.warning(f"Failed to parse LLM response: {e}")
+            # Return empty analysis on parse failure
+            return CompletionAnalysis(
+                completed_tasks=[],
+                in_progress_tasks=[],
+                blocked_tasks=[],
+                confidence=0.0,
+                reasoning=f"Failed to parse response: {e}",
+                provider_used=self.name,
+            )
+
+
+class OpenAIProvider(LLMProvider):
+    """LLM provider using OpenAI API directly."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    def is_available(self) -> bool:
+        return bool(os.environ.get("OPENAI_API_KEY"))
+
+    def _get_client(self):
+        """Get LangChain ChatOpenAI client."""
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            logger.warning("langchain_openai not installed")
+            return None
+
+        return ChatOpenAI(
+            model=DEFAULT_MODEL,
+            api_key=os.environ.get("OPENAI_API_KEY"),
+            temperature=0.1,
+        )
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        client = self._get_client()
+        if not client:
+            raise RuntimeError("LangChain OpenAI not available")
+
+        # Reuse the same prompt building logic
+        github_provider = GitHubModelsProvider()
+        prompt = github_provider._build_analysis_prompt(session_output, tasks, context)
+
+        try:
+            response = client.invoke(prompt)
+            result = github_provider._parse_response(response.content, tasks)
+            # Override provider name
+            return CompletionAnalysis(
+                completed_tasks=result.completed_tasks,
+                in_progress_tasks=result.in_progress_tasks,
+                blocked_tasks=result.blocked_tasks,
+                confidence=result.confidence,
+                reasoning=result.reasoning,
+                provider_used=self.name,
+            )
+        except Exception as e:
+            logger.error(f"OpenAI API error: {e}")
+            raise
+
+
+class RegexFallbackProvider(LLMProvider):
+    """Fallback provider using regex pattern matching (no API calls)."""
+
+    # Patterns indicating task completion
+    COMPLETION_PATTERNS = [
+        r"(?:completed?|finished|done|implemented|fixed|resolved)\s+(?:the\s+)?(.+?)(?:\.|$)",
+        r"✓\s+(.+?)(?:\.|$)",
+        r"\[x\]\s+(.+?)(?:\.|$)",
+        r"successfully\s+(?:completed?|implemented|fixed)\s+(.+?)(?:\.|$)",
+    ]
+
+    # Patterns indicating work in progress
+    PROGRESS_PATTERNS = [
+        r"(?:working on|started|beginning|implementing)\s+(.+?)(?:\.|$)",
+        r"(?:in progress|ongoing):\s*(.+?)(?:\.|$)",
+    ]
+
+    # Patterns indicating blockers
+    BLOCKER_PATTERNS = [
+        r"(?:blocked|stuck|cannot|failed|error)\s+(?:on\s+)?(.+?)(?:\.|$)",
+        r"(?:issue|problem|bug)\s+(?:with\s+)?(.+?)(?:\.|$)",
+    ]
+
+    @property
+    def name(self) -> str:
+        return "regex-fallback"
+
+    def is_available(self) -> bool:
+        return True  # Always available
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+
+        output_lower = session_output.lower()
+        completed = []
+        in_progress = []
+        blocked = []
+
+        for task in tasks:
+            task_lower = task.lower()
+            # Simple keyword matching
+            task_words = set(task_lower.split())
+
+            # Check for completion signals
+            is_completed = any(
+                word in output_lower
+                and any(
+                    p in output_lower
+                    for p in ["completed", "finished", "done", "fixed", "✓", "[x]"]
+                )
+                for word in task_words
+                if len(word) > 3
+            )
+
+            # Check for progress signals
+            is_in_progress = any(
+                word in output_lower
+                and any(
+                    p in output_lower
+                    for p in ["working on", "started", "implementing", "in progress"]
+                )
+                for word in task_words
+                if len(word) > 3
+            )
+
+            # Check for blocker signals
+            is_blocked = any(
+                word in output_lower
+                and any(
+                    p in output_lower for p in ["blocked", "stuck", "failed", "error", "cannot"]
+                )
+                for word in task_words
+                if len(word) > 3
+            )
+
+            if is_completed:
+                completed.append(task)
+            elif is_blocked:
+                blocked.append(task)
+            elif is_in_progress:
+                in_progress.append(task)
+
+        return CompletionAnalysis(
+            completed_tasks=completed,
+            in_progress_tasks=in_progress,
+            blocked_tasks=blocked,
+            confidence=0.3,  # Low confidence for regex
+            reasoning="Pattern-based analysis (no LLM available)",
+            provider_used=self.name,
+        )
+
+
+class FallbackChainProvider(LLMProvider):
+    """Provider that tries multiple providers in sequence."""
+
+    def __init__(self, providers: list[LLMProvider]):
+        self._providers = providers
+        self._active_provider: LLMProvider | None = None
+
+    @property
+    def name(self) -> str:
+        if self._active_provider:
+            return f"fallback-chain({self._active_provider.name})"
+        return "fallback-chain"
+
+    def is_available(self) -> bool:
+        return any(p.is_available() for p in self._providers)
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        last_error = None
+
+        for provider in self._providers:
+            if not provider.is_available():
+                logger.debug(f"Provider {provider.name} not available, skipping")
+                continue
+
+            try:
+                logger.info(f"Attempting analysis with {provider.name}")
+                self._active_provider = provider
+                result = provider.analyze_completion(session_output, tasks, context)
+                logger.info(f"Successfully analyzed with {provider.name}")
+                return result
+            except Exception as e:
+                logger.warning(f"Provider {provider.name} failed: {e}")
+                last_error = e
+                continue
+
+        if last_error:
+            raise RuntimeError(f"All providers failed. Last error: {last_error}")
+        raise RuntimeError("No providers available")
+
+
+def get_llm_provider() -> LLMProvider:
+    """
+    Get the best available LLM provider with fallback chain.
+
+    Returns a FallbackChainProvider that tries:
+    1. GitHub Models API (if GITHUB_TOKEN set)
+    2. OpenAI API (if OPENAI_API_KEY set)
+    3. Regex fallback (always available)
+    """
+    providers = [
+        GitHubModelsProvider(),
+        OpenAIProvider(),
+        RegexFallbackProvider(),
+    ]
+
+    return FallbackChainProvider(providers)
+
+
+def check_providers() -> dict[str, bool]:
+    """Check which providers are available."""
+    return {
+        "github-models": GitHubModelsProvider().is_available(),
+        "openai": OpenAIProvider().is_available(),
+        "regex-fallback": True,
+    }
+
+
+if __name__ == "__main__":
+    # Quick test
+    logging.basicConfig(level=logging.INFO)
+
+    print("Provider availability:")
+    for name, available in check_providers().items():
+        status = "✓" if available else "✗"
+        print(f"  {status} {name}")
+
+    provider = get_llm_provider()
+    print(f"\nActive provider chain: {provider.name}")
diff --git a/tools/requirements.txt b/tools/requirements.txt
new file mode 100644
index 000000000..6d4d34f21
--- /dev/null
+++ b/tools/requirements.txt
@@ -0,0 +1,3 @@
+# LLM Provider dependencies for Codex session analysis
+# These are optional - the provider falls back to regex if not available
+langchain-openai>=0.1.0