netclaw-dev · Aaronontheweb · May 26, 2026 · May 25, 2026
diff --git a/evals/profile-kv-cache.sh b/evals/profile-kv-cache.sh
@@ -289,6 +289,35 @@ run_scenario context_heavy "Conversation where responses are long, growing conte
     "Compare that to how Redis handles it, especially with Redis Cluster." \
     "Given what we discussed, if I'm building a global e-commerce platform, which tradeoff would you recommend and why?"
 
+# Scenario 7: Memory recall across many turns (Regression A canary).
+# Live evidence from PR #1171 follow-up showed partial cache drops
+# mid-session when memory-recall content changed between turns and
+# something upstream rebuilt the leading prefix. With the
+# SetSystemPrompt idempotency fix in place, the prefix must stay
+# byte-stable and cache hit rate must extend monotonically across the
+# six turns. A regression to "cache plateau at ~static prefix size on
+# turn 3+" matches the pre-fix failure mode.
+run_scenario memory_recall_mid_session "Multi-turn with shifting recall anchors — cache must extend, not plateau" \
+    "Remember this for our chat: my primary project name is Aurora and uses Rust on Tokio. Confirm in one sentence." \
+    "What's 17 times 23? Just the number." \
+    "Remind me what programming language and runtime my Aurora project uses." \
+    "Now switch gears: name three approaches for handling backpressure in async systems." \
+    "Of those three, which one is most idiomatic for the runtime my Aurora project uses?" \
+    "Summarize Aurora's stack and the recommended backpressure approach in two sentences."
+
+# Scenario 8: Tool discovery mid-session (Regression B canary).
+# Live evidence showed permanent cache=0 on every turn after a
+# dynamic `load_tool` call. The session.log captured cache collapse
+# from 99% hit rate to 0% across 5+ subsequent turns over 24 minutes.
+# This scenario exercises the dynamic-tool registration path via
+# search_tools + load_tool, then verifies subsequent turns still
+# extend the cache prefix.
+run_scenario tool_loaded_mid_session "Discover and load a tool mid-conversation — cache must recover next turn" \
+    "Tell me a one-sentence fun fact about hummingbirds. No tools needed." \
+    "Now I want you to use search_tools to find a tool that can list directory contents, then call load_tool on it (no need to execute the listed tool — just discover and load it)." \
+    "Did you successfully load the tool? In one sentence, name the tool you loaded." \
+    "Without invoking any more tools, what was the hummingbird fact you told me earlier?"
+
 # ─── Write Combined Results ───────────────────────────────────────────────────
 
 echo ""

diff --git a/src/Netclaw.Actors/Sessions/LlmSessionActor.cs b/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
@@ -2473,6 +2473,22 @@ private void SetSystemPrompt()
             return;
         }
 
+        // Idempotent on content: if the freshly-rebuilt prompt is
+        // byte-identical to history[0], skip the replacement. The
+        // immutable-list SetItem allocates a new spine even when the value
+        // is unchanged, and the rebuild itself drops the persisted prompt
+        // from llama.cpp's KV cache from token 0 because the underlying
+        // prompt bytes change identity. PR #1171 fixed the volatile-tail
+        // merge; this fix closes the residual cache-bust path where
+        // SetSystemPrompt fires unconditionally on every channel-driven
+        // turn even when on-disk identity files are unchanged.
+        if (_state.History.Count > 0
+            && _state.History[0].Role == Protocol.ChatRole.System
+            && string.Equals(_state.History[0].Content, content, StringComparison.Ordinal))
+        {
+            return;
+        }
+
         var systemMsg = new SerializableChatMessage
         {
             Role = Protocol.ChatRole.System,
@@ -2484,7 +2500,18 @@ private void SetSystemPrompt()
             ? _state with { History = _state.History.SetItem(0, systemMsg) }
             : _state with { History = _state.History.Insert(0, systemMsg) };
 
-        _log.Info("System prompt set ({PromptLength} chars)", content.Length);
+        // Hash is included so a future cache-drift investigation can confirm
+        // at a glance whether two "System prompt set" lines carry the same
+        // content or genuinely different content.
+        _log.Info("System prompt set ({PromptLength} chars, hash={ContentHash})",
+            content.Length,
+            ShortContentHash(content));
+    }
+
+    private static string ShortContentHash(string content)
+    {
+        var bytes = System.Security.Cryptography.SHA256.HashData(System.Text.Encoding.UTF8.GetBytes(content));
+        return Convert.ToHexString(bytes.AsSpan(0, 8)).ToLowerInvariant();
     }
 
     private void FireLlmCall(string? recallQuery = null, bool forceNoTools = false)

diff --git a/src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs b/src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs
@@ -423,7 +423,8 @@ public TestChatViewModel(ToolInteractionRequest? seed)
                 new DaemonClient("http://127.0.0.1:1"),
                 TimeProvider.System,
                 new ModelCapabilities { ModelId = "test-model" },
-                new ChatNavigationState())
+                new ChatNavigationState(),
+                new NetclawPaths())
         {
             _seed = seed;
         }

diff --git a/src/Netclaw.Cli/Tui/ChatViewModel.cs b/src/Netclaw.Cli/Tui/ChatViewModel.cs
@@ -23,6 +23,7 @@ public partial class ChatViewModel : ReactiveViewModel
     private readonly DaemonClient _daemonClient;
     private readonly TimeProvider _timeProvider;
     private readonly ModelCapabilities _modelCapabilities;
+    private readonly NetclawPaths _paths;
     private string? _resumeSessionId;
     private string? _initialMessage;
 
@@ -31,6 +32,12 @@ public partial class ChatViewModel : ReactiveViewModel
     private readonly Queue<ToolInteractionRequest> _pendingInteractions = new();
     private IDisposable? _daemonOutputSubscription;
     private IDisposable? _daemonConnectionSubscription;
+    // Per-session USAGE log writer. Mirrors HeadlessChannel's writer so the
+    // canonical signalr-{sessionId}.log file receives USAGE: in=... cached=...
+    // lines for both TUI and -p driven sessions. Without this, post-hoc KV
+    // cache analysis and eval tooling that anchors on the per-session log
+    // silently gets no data from TUI turns (issue #1173).
+    private StreamWriter? _usageLog;
     private bool _sessionReady;
     private int _connectAttempts;
     private readonly ObservableCollection<string> _approvalOptions = [];
@@ -73,11 +80,13 @@ public ChatViewModel(
         DaemonClient daemonClient,
         TimeProvider timeProvider,
         ModelCapabilities modelCapabilities,
-        ChatNavigationState navigationState)
+        ChatNavigationState navigationState,
+        NetclawPaths paths)
     {
         _daemonClient = daemonClient;
         _timeProvider = timeProvider;
         _modelCapabilities = modelCapabilities;
+        _paths = paths;
         _resumeSessionId = navigationState.TakeResumeSessionId();
         _initialMessage = navigationState.TakeInitialMessage();
     }
@@ -95,6 +104,11 @@ protected virtual Task InitializeSessionAsync()
             {
                 _outputSubject.OnNext(output);
 
+                if (output is UsageOutput usage)
+                {
+                    AppendUsageLog(usage);
+                }
+
                 switch (output)
                 {
                     case ToolInteractionRequest interaction:
@@ -341,11 +355,72 @@ internal void SeedPendingInteractionForTesting(ToolInteractionRequest interactio
         RequestRedraw();
     }
 
+    /// <summary>
+    /// Opens the per-session USAGE log file if not already open. Matches
+    /// HeadlessChannel's filename and append semantics so a single session
+    /// driven by both -p (headless) and TUI clients accumulates USAGE
+    /// lines in one canonical log. Safe to call repeatedly — guarded by
+    /// the null check so reconnects don't reopen the file.
+    /// </summary>
+    private void OpenUsageLogIfNeeded(string sessionIdValue)
+    {
+        if (_usageLog is not null)
+            return;
+
+        try
+        {
+            var logFileName = $"signalr-{sessionIdValue.Replace("/", "-", StringComparison.Ordinal)}.log";
+            var logPath = Path.Combine(_paths.LogsDirectory, logFileName);
+            _usageLog = new StreamWriter(logPath, append: true) { AutoFlush = true };
+            _usageLog.WriteLine($"[{_timeProvider.GetUtcNow():o}] TUI session attached: {sessionIdValue}");
+        }
+        catch (IOException ex)
+        {
+            // Logging is best-effort — never let a log-file failure break
+            // the live chat session. The daemon-side SessionLogActor at
+            // ~/.netclaw/logs/sessions/{id}/session.log remains the
+            // authoritative audit trail; surface the open failure to
+            // Debug so a misconfigured logs directory is at least visible
+            // under a debugger rather than silently lost.
+            System.Diagnostics.Debug.WriteLine($"ChatViewModel: failed to open USAGE log for session {sessionIdValue}: {ex.Message}");
+            _usageLog = null;
+        }
+    }
+
+    /// <summary>
+    /// Writes a USAGE line in the exact format produced by
+    /// HeadlessChannel.cs so existing tooling that grep's for "USAGE: in="
+    /// in the per-session log Just Works against TUI sessions too.
+    /// </summary>
+    private void AppendUsageLog(UsageOutput msg)
+    {
+        if (_usageLog is null)
+            return;
+
+        try
+        {
+            _usageLog.WriteLine(
+                $"[{_timeProvider.GetUtcNow():o}] USAGE: in={msg.InputTokens} out={msg.OutputTokens} total={msg.TotalTokens} cached={msg.CachedInputTokens} reasoning={msg.ReasoningTokens} context_window={msg.ContextWindowTokens} prompt_ms={msg.PromptMs} predicted_tok_s={msg.PredictedPerSecond}");
+        }
+        catch (IOException ex)
+        {
+            // See OpenUsageLogIfNeeded: best-effort logging that must not
+            // affect the live session. Disk-full or rotation races land
+            // here and would otherwise spam every turn — disable further
+            // attempts on this session by dropping the writer reference.
+            System.Diagnostics.Debug.WriteLine($"ChatViewModel: USAGE log write failed, disabling per-session log: {ex.Message}");
+            _usageLog.Dispose();
+            _usageLog = null;
+        }
+    }
+
     public override void Dispose()
     {
         _daemonOutputSubscription?.Dispose();
         _daemonConnectionSubscription?.Dispose();
         _outputSubject.Dispose();
+        _usageLog?.Dispose();
+        _usageLog = null;
 
         IsGenerating.Dispose();
         IsInputEnabled.Dispose();
@@ -397,6 +472,7 @@ private async Task EnsureSessionAndFlushAsync()
             ? await _daemonClient.ResumeSessionAsync(resumeId, DaemonClient.TuiChannelType)
             : await _daemonClient.EnsureSessionAsync(DaemonClient.TuiChannelType);
         SessionIdDisplay.Value = sessionId;
+        OpenUsageLogIfNeeded(sessionId);
         _sessionReady = true;
         IsInputEnabled.Value = true;
         _connectAttempts = 0;

diff --git a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
@@ -929,6 +929,139 @@ [new ChatMessage(ChatRole.User, "save")],
         Assert.Equal("store_memory", toolCall.Name);
     }
 
+    // ─── Phase 0 diagnostic tests for residual KV cache prefix drift ─────────
+    // These tests pin down byte-level invariants the existing logical
+    // assertions (Role/Text comparison) don't catch. A failure here means
+    // some serialization step between Assemble and the HTTP body is
+    // introducing per-turn drift that NormalizeMessages alone can't see.
+
+    [Fact]
+    public async Task Outbound_payload_static_prefix_byte_identical_when_only_volatile_context_differs()
+    {
+        // Post-#1176: per-turn volatile context lives inside <context>...
+        // </context> on the LAST User message rather than a trailing System
+        // tail (vLLM rejects trailing System messages). The cache prefix
+        // invariant becomes: leading System + every message except the last
+        // are byte-identical across consecutive turns.
+        var turn1Messages = new[]
+        {
+            new ChatMessage(ChatRole.System, "You are Netclaw, a helpful assistant.\n\n[session]\nid: test/cache-stability"),
+            new ChatMessage(ChatRole.User, "<context>\n[memory-recall]\nstatus: healthy\nrecall-A payload\n</context>\n\nfirst question"),
+        };
+        var turn2Messages = new[]
+        {
+            new ChatMessage(ChatRole.System, "You are Netclaw, a helpful assistant.\n\n[session]\nid: test/cache-stability"),
+            new ChatMessage(ChatRole.User, "first question"),
+            new ChatMessage(ChatRole.Assistant, "first answer"),
+            new ChatMessage(ChatRole.User, "<context>\n[memory-recall]\nstatus: healthy\nrecall-B payload completely different\n</context>\n\nsecond question"),
+        };
+
+        var bodies = await CaptureTwoRequestBodies(turn1Messages, turn2Messages);
+
+        // Strip the trailing volatile-carrying User message from each and
+        // assert byte equality position-by-position over what remains.
+        var turn1Static = ExtractStaticPrefixMessages(bodies.body1);
+        var turn2Static = ExtractStaticPrefixMessages(bodies.body2);
+        Assert.True(turn2Static.Count >= turn1Static.Count,
+            $"Turn 2 must have at least as many static messages as turn 1 (turn1={turn1Static.Count}, turn2={turn2Static.Count}).");
+        for (var i = 0; i < turn1Static.Count; i++)
+        {
+            Assert.Equal(turn1Static[i], turn2Static[i]);
+        }
+    }
+
+    [Fact]
+    public async Task Outbound_payload_tools_array_byte_identical_across_calls_with_same_tool_set()
+    {
+        // Regression B canary: two consecutive calls with the same tool
+        // collection (same order, same definitions) must serialize the
+        // `tools` field to byte-identical JSON. A failure here means
+        // tool-list serialization is non-deterministic, which would bust
+        // the cache the moment any tool is added to a session.
+        using var schemaDoc = JsonDocument.Parse(
+            "{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\"}},\"required\":[\"query\"]}");
+        var toolA = AIFunctionFactory.CreateDeclaration("search_tools", "Search tools", schemaDoc.RootElement);
+        using var storeSchemaDoc = JsonDocument.Parse(
+            "{\"type\":\"object\",\"properties\":{\"text\":{\"type\":\"string\"}},\"required\":[\"text\"]}");
+        var toolB = AIFunctionFactory.CreateDeclaration("store_memory", "Store memory", storeSchemaDoc.RootElement);
+
+        var sameMessages = new[] { new ChatMessage(ChatRole.User, "hello") };
+
+        var capturedBodies = new List<string>();
+        using var handler = new RecordingHandler(req =>
+        {
+            capturedBodies.Add(req.Content!.ReadAsStringAsync().GetAwaiter().GetResult());
+            return new HttpResponseMessage(HttpStatusCode.OK)
+            {
+                Content = new StringContent(
+                    "{\"id\":\"1\",\"model\":\"test\",\"choices\":[{\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"}}]}",
+                    Encoding.UTF8, "application/json")
+            };
+        });
+        using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") };
+        var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000");
+        var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model");
+
+        var options = new ChatOptions { Tools = [toolA, toolB] };
+        await client.GetResponseAsync(sameMessages, options, cancellationToken: TestContext.Current.CancellationToken);
+        await client.GetResponseAsync(sameMessages, options, cancellationToken: TestContext.Current.CancellationToken);
+
+        Assert.Equal(2, capturedBodies.Count);
+        var tools1 = ExtractToolsJson(capturedBodies[0]);
+        var tools2 = ExtractToolsJson(capturedBodies[1]);
+        Assert.Equal(tools1, tools2);
+    }
+
+    private async Task<(string body1, string body2)> CaptureTwoRequestBodies(
+        IReadOnlyList<ChatMessage> turn1, IReadOnlyList<ChatMessage> turn2)
+    {
+        var bodies = new List<string>();
+        using var handler = new RecordingHandler(req =>
+        {
+            bodies.Add(req.Content!.ReadAsStringAsync().GetAwaiter().GetResult());
+            return new HttpResponseMessage(HttpStatusCode.OK)
+            {
+                Content = new StringContent(
+                    "{\"id\":\"1\",\"model\":\"test\",\"choices\":[{\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"}}]}",
+                    Encoding.UTF8, "application/json")
+            };
+        });
+        using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") };
+        var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000");
+        var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model");
+
+        await client.GetResponseAsync(turn1, cancellationToken: TestContext.Current.CancellationToken);
+        await client.GetResponseAsync(turn2, cancellationToken: TestContext.Current.CancellationToken);
+        Assert.Equal(2, bodies.Count);
+        return (bodies[0], bodies[1]);
+    }
+
+    private static List<string> ExtractStaticPrefixMessages(string requestBody)
+    {
+        // Return the messages array as a list of per-element JSON strings,
+        // with the trailing message stripped. Post-#1176 the trailing
+        // message is the volatile-carrying User turn (with <context>);
+        // pre-#1176 it was a trailing System tail. Either way, that final
+        // entry is the only one allowed to diverge between turns; the rest
+        // must be byte-identical position-for-position for the KV cache
+        // prefix to extend.
+        var root = JsonNode.Parse(requestBody)!.AsObject();
+        var messages = root["messages"]!.AsArray();
+        var end = messages.Count > 0 ? messages.Count - 1 : 0;
+        var result = new List<string>(end);
+        for (var i = 0; i < end; i++)
+            result.Add(messages[i]!.ToJsonString());
+        return result;
+    }
+
+    private static string ExtractToolsJson(string requestBody)
+    {
+        using var doc = JsonDocument.Parse(requestBody);
+        if (!doc.RootElement.TryGetProperty("tools", out var toolsElement))
+            return "(none)";
+        return toolsElement.GetRawText();
+    }
+
     private sealed class RecordingHandler : HttpMessageHandler
     {
         private readonly Func<HttpRequestMessage, HttpResponseMessage> _handler;