From bd74119eda4cde3cd480f50ffdd074e8516ca69d Mon Sep 17 00:00:00 2001
From: Aaron Stannard <aaron@petabridge.com>
Date: Mon, 25 May 2026 17:44:47 +0000
Subject: [PATCH] fix(kv-cache): close residual prefix-invalidation paths after
 PR #1171
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #1171 fixed the largest KV cache prefix drift source — NormalizeMessages
merging volatile content (memory recall, current time, working context) into
the leading system message — but live post-deploy validation surfaced two
residual failure modes:

  - Regression A: mid-session prefix rebuilds caused partial cache drops
    (40K → 16K, 50K → 8K) when SetSystemPrompt fired unconditionally on
    every Slack/Discord turn even when on-disk identity files were unchanged
  - Regression B: dynamic load_tool calls correlated with permanent cache=0
    on every subsequent turn for 24+ minutes

This change ships four bounded fixes plus regression infrastructure:

  1. SetSystemPrompt is now idempotent on content (LlmSessionActor.cs).
     Skips the replace+log when the freshly-rebuilt prompt bytes equal
     history[0]. Log line now carries a short content hash so future
     drift is immediately visible — char-count alone proved insufficient
     during the original investigation.

  2. kv_prefix_diagnostic logging in OpenAiCompatibleChatClient emits
     per-turn SHA-256 prefix hashes for the leading system message,
     conversation history, and tools array. Permanent runtime
     instrumentation for any future cache regression — turn-over-turn
     diff isolates which region drifted.

  3. TUI session USAGE log writer (issue #1173) — ChatViewModel now
     opens the same per-session signalr-{id}.log file HeadlessChannel
     writes to and emits USAGE: in=... cached=... lines in the
     byte-identical format. Eval tooling and ad-hoc grep against the
     per-session log Just Works for both -p and TUI sessions.

  4. Two byte-equality unit tests in OpenAiCompatibleChatClientTests
     confirm the provider serialization layer is byte-stable for
     identical inputs (Phase 0 finding: PR #1171's NormalizeMessages
     is correct at this layer; live drift is upstream in actor state).

Two new scenarios in evals/profile-kv-cache.sh — memory_recall_mid_session
and tool_loaded_mid_session — exercise the Regression A/B failure shapes
end-to-end against a real LLM. Run via the existing Docker harness against
https://llm.testlab.petabridge.net.

3,347 unit tests pass. slopwatch clean. headers verified.
---
 evals/profile-kv-cache.sh                     |  29 ++++
 .../Sessions/LlmSessionActor.cs               |  29 +++-
 src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs    |   3 +-
 src/Netclaw.Cli/Tui/ChatViewModel.cs          |  78 +++++++++-
 .../OpenAiCompatibleChatClientTests.cs        | 133 ++++++++++++++++++
 .../SelfHosted/OpenAiCompatibleChatClient.cs  |  93 ++++++++++++
 6 files changed, 362 insertions(+), 3 deletions(-)

diff --git a/evals/profile-kv-cache.sh b/evals/profile-kv-cache.sh
index 74a998a16..1bc3436f6 100755
--- a/evals/profile-kv-cache.sh
+++ b/evals/profile-kv-cache.sh
@@ -289,6 +289,35 @@ run_scenario context_heavy "Conversation where responses are long, growing conte
     "Compare that to how Redis handles it, especially with Redis Cluster." \
     "Given what we discussed, if I'm building a global e-commerce platform, which tradeoff would you recommend and why?"
 
+# Scenario 7: Memory recall across many turns (Regression A canary).
+# Live evidence from PR #1171 follow-up showed partial cache drops
+# mid-session when memory-recall content changed between turns and
+# something upstream rebuilt the leading prefix. With the
+# SetSystemPrompt idempotency fix in place, the prefix must stay
+# byte-stable and cache hit rate must extend monotonically across the
+# six turns. A regression to "cache plateau at ~static prefix size on
+# turn 3+" matches the pre-fix failure mode.
+run_scenario memory_recall_mid_session "Multi-turn with shifting recall anchors — cache must extend, not plateau" \
+    "Remember this for our chat: my primary project name is Aurora and uses Rust on Tokio. Confirm in one sentence." \
+    "What's 17 times 23? Just the number." \
+    "Remind me what programming language and runtime my Aurora project uses." \
+    "Now switch gears: name three approaches for handling backpressure in async systems." \
+    "Of those three, which one is most idiomatic for the runtime my Aurora project uses?" \
+    "Summarize Aurora's stack and the recommended backpressure approach in two sentences."
+
+# Scenario 8: Tool discovery mid-session (Regression B canary).
+# Live evidence showed permanent cache=0 on every turn after a
+# dynamic `load_tool` call. The session.log captured cache collapse
+# from 99% hit rate to 0% across 5+ subsequent turns over 24 minutes.
+# This scenario exercises the dynamic-tool registration path via
+# search_tools + load_tool, then verifies subsequent turns still
+# extend the cache prefix.
+run_scenario tool_loaded_mid_session "Discover and load a tool mid-conversation — cache must recover next turn" \
+    "Tell me a one-sentence fun fact about hummingbirds. No tools needed." \
+    "Now I want you to use search_tools to find a tool that can list directory contents, then call load_tool on it (no need to execute the listed tool — just discover and load it)." \
+    "Did you successfully load the tool? In one sentence, name the tool you loaded." \
+    "Without invoking any more tools, what was the hummingbird fact you told me earlier?"
+
 # ─── Write Combined Results ───────────────────────────────────────────────────
 
 echo ""
diff --git a/src/Netclaw.Actors/Sessions/LlmSessionActor.cs b/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
index 76bd3f259..db5be3859 100644
--- a/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
+++ b/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
@@ -2473,6 +2473,22 @@ private void SetSystemPrompt()
             return;
         }
 
+        // Idempotent on content: if the freshly-rebuilt prompt is
+        // byte-identical to history[0], skip the replacement. The
+        // immutable-list SetItem allocates a new spine even when the value
+        // is unchanged, and the rebuild itself drops the persisted prompt
+        // from llama.cpp's KV cache from token 0 because the underlying
+        // prompt bytes change identity. PR #1171 fixed the volatile-tail
+        // merge; this fix closes the residual cache-bust path where
+        // SetSystemPrompt fires unconditionally on every channel-driven
+        // turn even when on-disk identity files are unchanged.
+        if (_state.History.Count > 0
+            && _state.History[0].Role == Protocol.ChatRole.System
+            && string.Equals(_state.History[0].Content, content, StringComparison.Ordinal))
+        {
+            return;
+        }
+
         var systemMsg = new SerializableChatMessage
         {
             Role = Protocol.ChatRole.System,
@@ -2484,7 +2500,18 @@ private void SetSystemPrompt()
             ? _state with { History = _state.History.SetItem(0, systemMsg) }
             : _state with { History = _state.History.Insert(0, systemMsg) };
 
-        _log.Info("System prompt set ({PromptLength} chars)", content.Length);
+        // Hash is included so a future cache-drift investigation can confirm
+        // at a glance whether two "System prompt set" lines carry the same
+        // content or genuinely different content.
+        _log.Info("System prompt set ({PromptLength} chars, hash={ContentHash})",
+            content.Length,
+            ShortContentHash(content));
+    }
+
+    private static string ShortContentHash(string content)
+    {
+        var bytes = System.Security.Cryptography.SHA256.HashData(System.Text.Encoding.UTF8.GetBytes(content));
+        return Convert.ToHexString(bytes.AsSpan(0, 8)).ToLowerInvariant();
     }
 
     private void FireLlmCall(string? recallQuery = null, bool forceNoTools = false)
diff --git a/src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs b/src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs
index 4cf6c5f66..16032c10a 100644
--- a/src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs
+++ b/src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs
@@ -423,7 +423,8 @@ public TestChatViewModel(ToolInteractionRequest? seed)
                 new DaemonClient("http://127.0.0.1:1"),
                 TimeProvider.System,
                 new ModelCapabilities { ModelId = "test-model" },
-                new ChatNavigationState())
+                new ChatNavigationState(),
+                new NetclawPaths())
         {
             _seed = seed;
         }
diff --git a/src/Netclaw.Cli/Tui/ChatViewModel.cs b/src/Netclaw.Cli/Tui/ChatViewModel.cs
index af276b19f..8ccba255a 100644
--- a/src/Netclaw.Cli/Tui/ChatViewModel.cs
+++ b/src/Netclaw.Cli/Tui/ChatViewModel.cs
@@ -23,6 +23,7 @@ public partial class ChatViewModel : ReactiveViewModel
     private readonly DaemonClient _daemonClient;
     private readonly TimeProvider _timeProvider;
     private readonly ModelCapabilities _modelCapabilities;
+    private readonly NetclawPaths _paths;
     private string? _resumeSessionId;
     private string? _initialMessage;
 
@@ -31,6 +32,12 @@ public partial class ChatViewModel : ReactiveViewModel
     private readonly Queue<ToolInteractionRequest> _pendingInteractions = new();
     private IDisposable? _daemonOutputSubscription;
     private IDisposable? _daemonConnectionSubscription;
+    // Per-session USAGE log writer. Mirrors HeadlessChannel's writer so the
+    // canonical signalr-{sessionId}.log file receives USAGE: in=... cached=...
+    // lines for both TUI and -p driven sessions. Without this, post-hoc KV
+    // cache analysis and eval tooling that anchors on the per-session log
+    // silently gets no data from TUI turns (issue #1173).
+    private StreamWriter? _usageLog;
     private bool _sessionReady;
     private int _connectAttempts;
     private readonly ObservableCollection<string> _approvalOptions = [];
@@ -73,11 +80,13 @@ public ChatViewModel(
         DaemonClient daemonClient,
         TimeProvider timeProvider,
         ModelCapabilities modelCapabilities,
-        ChatNavigationState navigationState)
+        ChatNavigationState navigationState,
+        NetclawPaths paths)
     {
         _daemonClient = daemonClient;
         _timeProvider = timeProvider;
         _modelCapabilities = modelCapabilities;
+        _paths = paths;
         _resumeSessionId = navigationState.TakeResumeSessionId();
         _initialMessage = navigationState.TakeInitialMessage();
     }
@@ -95,6 +104,11 @@ protected virtual Task InitializeSessionAsync()
             {
                 _outputSubject.OnNext(output);
 
+                if (output is UsageOutput usage)
+                {
+                    AppendUsageLog(usage);
+                }
+
                 switch (output)
                 {
                     case ToolInteractionRequest interaction:
@@ -341,11 +355,72 @@ internal void SeedPendingInteractionForTesting(ToolInteractionRequest interactio
         RequestRedraw();
     }
 
+    /// <summary>
+    /// Opens the per-session USAGE log file if not already open. Matches
+    /// HeadlessChannel's filename and append semantics so a single session
+    /// driven by both -p (headless) and TUI clients accumulates USAGE
+    /// lines in one canonical log. Safe to call repeatedly — guarded by
+    /// the null check so reconnects don't reopen the file.
+    /// </summary>
+    private void OpenUsageLogIfNeeded(string sessionIdValue)
+    {
+        if (_usageLog is not null)
+            return;
+
+        try
+        {
+            var logFileName = $"signalr-{sessionIdValue.Replace("/", "-", StringComparison.Ordinal)}.log";
+            var logPath = Path.Combine(_paths.LogsDirectory, logFileName);
+            _usageLog = new StreamWriter(logPath, append: true) { AutoFlush = true };
+            _usageLog.WriteLine($"[{_timeProvider.GetUtcNow():o}] TUI session attached: {sessionIdValue}");
+        }
+        catch (IOException ex)
+        {
+            // Logging is best-effort — never let a log-file failure break
+            // the live chat session. The daemon-side SessionLogActor at
+            // ~/.netclaw/logs/sessions/{id}/session.log remains the
+            // authoritative audit trail; surface the open failure to
+            // Debug so a misconfigured logs directory is at least visible
+            // under a debugger rather than silently lost.
+            System.Diagnostics.Debug.WriteLine($"ChatViewModel: failed to open USAGE log for session {sessionIdValue}: {ex.Message}");
+            _usageLog = null;
+        }
+    }
+
+    /// <summary>
+    /// Writes a USAGE line in the exact format produced by
+    /// HeadlessChannel.cs so existing tooling that grep's for "USAGE: in="
+    /// in the per-session log Just Works against TUI sessions too.
+    /// </summary>
+    private void AppendUsageLog(UsageOutput msg)
+    {
+        if (_usageLog is null)
+            return;
+
+        try
+        {
+            _usageLog.WriteLine(
+                $"[{_timeProvider.GetUtcNow():o}] USAGE: in={msg.InputTokens} out={msg.OutputTokens} total={msg.TotalTokens} cached={msg.CachedInputTokens} reasoning={msg.ReasoningTokens} context_window={msg.ContextWindowTokens} prompt_ms={msg.PromptMs} predicted_tok_s={msg.PredictedPerSecond}");
+        }
+        catch (IOException ex)
+        {
+            // See OpenUsageLogIfNeeded: best-effort logging that must not
+            // affect the live session. Disk-full or rotation races land
+            // here and would otherwise spam every turn — disable further
+            // attempts on this session by dropping the writer reference.
+            System.Diagnostics.Debug.WriteLine($"ChatViewModel: USAGE log write failed, disabling per-session log: {ex.Message}");
+            _usageLog.Dispose();
+            _usageLog = null;
+        }
+    }
+
     public override void Dispose()
     {
         _daemonOutputSubscription?.Dispose();
         _daemonConnectionSubscription?.Dispose();
         _outputSubject.Dispose();
+        _usageLog?.Dispose();
+        _usageLog = null;
 
         IsGenerating.Dispose();
         IsInputEnabled.Dispose();
@@ -397,6 +472,7 @@ private async Task EnsureSessionAndFlushAsync()
             ? await _daemonClient.ResumeSessionAsync(resumeId, DaemonClient.TuiChannelType)
             : await _daemonClient.EnsureSessionAsync(DaemonClient.TuiChannelType);
         SessionIdDisplay.Value = sessionId;
+        OpenUsageLogIfNeeded(sessionId);
         _sessionReady = true;
         IsInputEnabled.Value = true;
         _connectAttempts = 0;
diff --git a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
index 07ddbffc9..907cce9d4 100644
--- a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
+++ b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
@@ -929,6 +929,139 @@ [new ChatMessage(ChatRole.User, "save")],
         Assert.Equal("store_memory", toolCall.Name);
     }
 
+    // ─── Phase 0 diagnostic tests for residual KV cache prefix drift ─────────
+    // These tests pin down byte-level invariants the existing logical
+    // assertions (Role/Text comparison) don't catch. A failure here means
+    // some serialization step between Assemble and the HTTP body is
+    // introducing per-turn drift that NormalizeMessages alone can't see.
+
+    [Fact]
+    public async Task Outbound_payload_static_prefix_byte_identical_when_only_volatile_context_differs()
+    {
+        // Post-#1176: per-turn volatile context lives inside <context>...
+        // </context> on the LAST User message rather than a trailing System
+        // tail (vLLM rejects trailing System messages). The cache prefix
+        // invariant becomes: leading System + every message except the last
+        // are byte-identical across consecutive turns.
+        var turn1Messages = new[]
+        {
+            new ChatMessage(ChatRole.System, "You are Netclaw, a helpful assistant.\n\n[session]\nid: test/cache-stability"),
+            new ChatMessage(ChatRole.User, "<context>\n[memory-recall]\nstatus: healthy\nrecall-A payload\n</context>\n\nfirst question"),
+        };
+        var turn2Messages = new[]
+        {
+            new ChatMessage(ChatRole.System, "You are Netclaw, a helpful assistant.\n\n[session]\nid: test/cache-stability"),
+            new ChatMessage(ChatRole.User, "first question"),
+            new ChatMessage(ChatRole.Assistant, "first answer"),
+            new ChatMessage(ChatRole.User, "<context>\n[memory-recall]\nstatus: healthy\nrecall-B payload completely different\n</context>\n\nsecond question"),
+        };
+
+        var bodies = await CaptureTwoRequestBodies(turn1Messages, turn2Messages);
+
+        // Strip the trailing volatile-carrying User message from each and
+        // assert byte equality position-by-position over what remains.
+        var turn1Static = ExtractStaticPrefixMessages(bodies.body1);
+        var turn2Static = ExtractStaticPrefixMessages(bodies.body2);
+        Assert.True(turn2Static.Count >= turn1Static.Count,
+            $"Turn 2 must have at least as many static messages as turn 1 (turn1={turn1Static.Count}, turn2={turn2Static.Count}).");
+        for (var i = 0; i < turn1Static.Count; i++)
+        {
+            Assert.Equal(turn1Static[i], turn2Static[i]);
+        }
+    }
+
+    [Fact]
+    public async Task Outbound_payload_tools_array_byte_identical_across_calls_with_same_tool_set()
+    {
+        // Regression B canary: two consecutive calls with the same tool
+        // collection (same order, same definitions) must serialize the
+        // `tools` field to byte-identical JSON. A failure here means
+        // tool-list serialization is non-deterministic, which would bust
+        // the cache the moment any tool is added to a session.
+        using var schemaDoc = JsonDocument.Parse(
+            "{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\"}},\"required\":[\"query\"]}");
+        var toolA = AIFunctionFactory.CreateDeclaration("search_tools", "Search tools", schemaDoc.RootElement);
+        using var storeSchemaDoc = JsonDocument.Parse(
+            "{\"type\":\"object\",\"properties\":{\"text\":{\"type\":\"string\"}},\"required\":[\"text\"]}");
+        var toolB = AIFunctionFactory.CreateDeclaration("store_memory", "Store memory", storeSchemaDoc.RootElement);
+
+        var sameMessages = new[] { new ChatMessage(ChatRole.User, "hello") };
+
+        var capturedBodies = new List<string>();
+        using var handler = new RecordingHandler(req =>
+        {
+            capturedBodies.Add(req.Content!.ReadAsStringAsync().GetAwaiter().GetResult());
+            return new HttpResponseMessage(HttpStatusCode.OK)
+            {
+                Content = new StringContent(
+                    "{\"id\":\"1\",\"model\":\"test\",\"choices\":[{\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"}}]}",
+                    Encoding.UTF8, "application/json")
+            };
+        });
+        using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") };
+        var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000");
+        var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model");
+
+        var options = new ChatOptions { Tools = [toolA, toolB] };
+        await client.GetResponseAsync(sameMessages, options, cancellationToken: TestContext.Current.CancellationToken);
+        await client.GetResponseAsync(sameMessages, options, cancellationToken: TestContext.Current.CancellationToken);
+
+        Assert.Equal(2, capturedBodies.Count);
+        var tools1 = ExtractToolsJson(capturedBodies[0]);
+        var tools2 = ExtractToolsJson(capturedBodies[1]);
+        Assert.Equal(tools1, tools2);
+    }
+
+    private async Task<(string body1, string body2)> CaptureTwoRequestBodies(
+        IReadOnlyList<ChatMessage> turn1, IReadOnlyList<ChatMessage> turn2)
+    {
+        var bodies = new List<string>();
+        using var handler = new RecordingHandler(req =>
+        {
+            bodies.Add(req.Content!.ReadAsStringAsync().GetAwaiter().GetResult());
+            return new HttpResponseMessage(HttpStatusCode.OK)
+            {
+                Content = new StringContent(
+                    "{\"id\":\"1\",\"model\":\"test\",\"choices\":[{\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"}}]}",
+                    Encoding.UTF8, "application/json")
+            };
+        });
+        using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") };
+        var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000");
+        var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model");
+
+        await client.GetResponseAsync(turn1, cancellationToken: TestContext.Current.CancellationToken);
+        await client.GetResponseAsync(turn2, cancellationToken: TestContext.Current.CancellationToken);
+        Assert.Equal(2, bodies.Count);
+        return (bodies[0], bodies[1]);
+    }
+
+    private static List<string> ExtractStaticPrefixMessages(string requestBody)
+    {
+        // Return the messages array as a list of per-element JSON strings,
+        // with the trailing message stripped. Post-#1176 the trailing
+        // message is the volatile-carrying User turn (with <context>);
+        // pre-#1176 it was a trailing System tail. Either way, that final
+        // entry is the only one allowed to diverge between turns; the rest
+        // must be byte-identical position-for-position for the KV cache
+        // prefix to extend.
+        var root = JsonNode.Parse(requestBody)!.AsObject();
+        var messages = root["messages"]!.AsArray();
+        var end = messages.Count > 0 ? messages.Count - 1 : 0;
+        var result = new List<string>(end);
+        for (var i = 0; i < end; i++)
+            result.Add(messages[i]!.ToJsonString());
+        return result;
+    }
+
+    private static string ExtractToolsJson(string requestBody)
+    {
+        using var doc = JsonDocument.Parse(requestBody);
+        if (!doc.RootElement.TryGetProperty("tools", out var toolsElement))
+            return "(none)";
+        return toolsElement.GetRawText();
+    }
+
     private sealed class RecordingHandler : HttpMessageHandler
     {
         private readonly Func<HttpRequestMessage, HttpResponseMessage> _handler;
diff --git a/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs b/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs
index 7e67d04fc..afd0c05c2 100644
--- a/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs
+++ b/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs
@@ -5,6 +5,7 @@
 // -----------------------------------------------------------------------
 using System.Net.Http.Headers;
 using System.Runtime.CompilerServices;
+using System.Security.Cryptography;
 using System.Text;
 using System.Text.Json;
 using System.Text.Json.Nodes;
@@ -244,9 +245,101 @@ private JsonObject BuildPayload(IEnumerable<ChatMessage> messages, ChatOptions?
             }
         }
 
+        LogPrefixDiagnostic(body);
+
         return body;
     }
 
+    /// <summary>
+    /// Emits per-turn byte hashes for the static portion of the outbound LLM
+    /// request so KV cache prefix drift can be diagnosed from the daemon log.
+    /// PR #1171 fixed the largest source of drift (volatile content merging
+    /// into the leading system message); PR #1176 moved the volatile tail
+    /// inside the last User message's <c>&lt;context&gt;</c> wrapper. The
+    /// diagnostic emits SHA-256 hashes of three independent regions so a
+    /// log-line diff between consecutive turns isolates which region drifted.
+    /// Post-#1176 the trailing message is normally the wrapped User turn (or
+    /// a Tool result mid-loop), never a System tail — <c>tail_is_system</c>
+    /// stays false on the new wire format and is kept as a defensive signal
+    /// in case an upstream regression re-emits a trailing System.
+    /// </summary>
+    private void LogPrefixDiagnostic(JsonObject body)
+    {
+        if (!_logger.IsEnabled(LogLevel.Debug))
+            return;
+
+        var messages = body["messages"] as JsonArray;
+        if (messages is null || messages.Count == 0)
+            return;
+
+        // Leading system: messages[0] when role=system. Hash the content
+        // string only — the {role,content} envelope is invariant, so any
+        // drift here means the persisted prompt content itself changed.
+        var systemPrefixHash = "(none)";
+        var systemPrefixChars = 0;
+        if (messages[0] is JsonObject first
+            && first["role"]?.GetValue<string>() == "system")
+        {
+            var content = first["content"]?.GetValue<string>() ?? string.Empty;
+            systemPrefixChars = content.Length;
+            systemPrefixHash = ComputeShortHash(content);
+        }
+
+        // Defensive signal: post-#1176 the assembler does NOT emit a
+        // trailing System tail (volatile content goes inside the last User
+        // message). If this flips to true an upstream regression re-emitted
+        // a trailing System — that's exactly the regression #1176 fixed.
+        var tailIsSystem = messages.Count > 1
+            && messages[^1] is JsonObject last
+            && last["role"]?.GetValue<string>() == "system";
+
+        // History prefix: everything between the leading system and the new
+        // user turn (and excluding any trailing System tail if one slipped
+        // through). This is the region that should be byte-stable across
+        // consecutive turns; if it isn't, either history was rewritten
+        // mid-session or a message was re-serialized differently.
+        var historyEnd = tailIsSystem ? messages.Count - 1 : messages.Count;
+        var historyStart = systemPrefixHash == "(none)" ? 0 : 1;
+        if (historyEnd > historyStart
+            && messages[historyEnd - 1] is JsonObject newTurn
+            && newTurn["role"]?.GetValue<string>() == "user")
+        {
+            historyEnd -= 1;
+        }
+
+        var historyPrefixHash = "(none)";
+        var historyMsgCount = 0;
+        if (historyEnd > historyStart)
+        {
+            var sb = new StringBuilder();
+            for (var i = historyStart; i < historyEnd; i++)
+                sb.Append(messages[i]!.ToJsonString(JsonOptions));
+            historyPrefixHash = ComputeShortHash(sb.ToString());
+            historyMsgCount = historyEnd - historyStart;
+        }
+
+        var toolsHash = "(none)";
+        var toolsCount = 0;
+        if (body["tools"] is JsonArray toolsArray && toolsArray.Count > 0)
+        {
+            toolsHash = ComputeShortHash(toolsArray.ToJsonString(JsonOptions));
+            toolsCount = toolsArray.Count;
+        }
+
+        _logger.LogDebug(
+            "kv_prefix_diagnostic system_prefix_hash={SystemPrefixHash} system_prefix_chars={SystemPrefixChars} history_prefix_hash={HistoryPrefixHash} history_msg_count={HistoryMsgCount} tail_is_system={TailIsSystem} tools_hash={ToolsHash} tools_count={ToolsCount} total_messages={TotalMessages}",
+            systemPrefixHash, systemPrefixChars, historyPrefixHash, historyMsgCount, tailIsSystem, toolsHash, toolsCount, messages.Count);
+    }
+
+    private static string ComputeShortHash(string content)
+    {
+        // First 8 bytes (16 hex chars) is plenty to detect drift between
+        // consecutive turns; full SHA-256 would spam logs without adding
+        // diagnostic value at this scale.
+        var hash = SHA256.HashData(Encoding.UTF8.GetBytes(content));
+        return Convert.ToHexString(hash.AsSpan(0, 8)).ToLowerInvariant();
+    }
+
     internal static JsonArray NormalizeMessages(IEnumerable<ChatMessage> messages, ILogger? logger = null)
     {
         var normalized = new JsonArray();