Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions evals/profile-kv-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,35 @@ run_scenario context_heavy "Conversation where responses are long, growing conte
"Compare that to how Redis handles it, especially with Redis Cluster." \
"Given what we discussed, if I'm building a global e-commerce platform, which tradeoff would you recommend and why?"

# Scenario 7: Memory recall across many turns (Regression A canary).
# Live evidence from PR #1171 follow-up showed partial cache drops
# mid-session when memory-recall content changed between turns and
# something upstream rebuilt the leading prefix. With the
# SetSystemPrompt idempotency fix in place, the prefix must stay
# byte-stable and cache hit rate must extend monotonically across the
# six turns. A regression to "cache plateau at ~static prefix size on
# turn 3+" matches the pre-fix failure mode.
run_scenario memory_recall_mid_session "Multi-turn with shifting recall anchors — cache must extend, not plateau" \
"Remember this for our chat: my primary project name is Aurora and uses Rust on Tokio. Confirm in one sentence." \
"What's 17 times 23? Just the number." \
"Remind me what programming language and runtime my Aurora project uses." \
"Now switch gears: name three approaches for handling backpressure in async systems." \
"Of those three, which one is most idiomatic for the runtime my Aurora project uses?" \
"Summarize Aurora's stack and the recommended backpressure approach in two sentences."

# Scenario 8: Tool discovery mid-session (Regression B canary).
# Live evidence showed permanent cache=0 on every turn after a
# dynamic `load_tool` call. The session.log captured cache collapse
# from 99% hit rate to 0% across 5+ subsequent turns over 24 minutes.
# This scenario exercises the dynamic-tool registration path via
# search_tools + load_tool, then verifies subsequent turns still
# extend the cache prefix.
run_scenario tool_loaded_mid_session "Discover and load a tool mid-conversation — cache must recover next turn" \
"Tell me a one-sentence fun fact about hummingbirds. No tools needed." \
"Now I want you to use search_tools to find a tool that can list directory contents, then call load_tool on it (no need to execute the listed tool — just discover and load it)." \
"Did you successfully load the tool? In one sentence, name the tool you loaded." \
"Without invoking any more tools, what was the hummingbird fact you told me earlier?"

# ─── Write Combined Results ───────────────────────────────────────────────────

echo ""
Expand Down
29 changes: 28 additions & 1 deletion src/Netclaw.Actors/Sessions/LlmSessionActor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2473,6 +2473,22 @@ private void SetSystemPrompt()
return;
}

// Idempotent on content: if the freshly-rebuilt prompt is
// byte-identical to history[0], skip the replacement. The
// immutable-list SetItem allocates a new spine even when the value
// is unchanged, and the rebuild itself drops the persisted prompt
// from llama.cpp's KV cache from token 0 because the underlying
// prompt bytes change identity. PR #1171 fixed the volatile-tail
// merge; this fix closes the residual cache-bust path where
// SetSystemPrompt fires unconditionally on every channel-driven
// turn even when on-disk identity files are unchanged.
if (_state.History.Count > 0
&& _state.History[0].Role == Protocol.ChatRole.System
&& string.Equals(_state.History[0].Content, content, StringComparison.Ordinal))
{
return;
}

var systemMsg = new SerializableChatMessage
{
Role = Protocol.ChatRole.System,
Expand All @@ -2484,7 +2500,18 @@ private void SetSystemPrompt()
? _state with { History = _state.History.SetItem(0, systemMsg) }
: _state with { History = _state.History.Insert(0, systemMsg) };

_log.Info("System prompt set ({PromptLength} chars)", content.Length);
// Hash is included so a future cache-drift investigation can confirm
// at a glance whether two "System prompt set" lines carry the same
// content or genuinely different content.
_log.Info("System prompt set ({PromptLength} chars, hash={ContentHash})",
content.Length,
ShortContentHash(content));
}

private static string ShortContentHash(string content)
{
var bytes = System.Security.Cryptography.SHA256.HashData(System.Text.Encoding.UTF8.GetBytes(content));
return Convert.ToHexString(bytes.AsSpan(0, 8)).ToLowerInvariant();
}

private void FireLlmCall(string? recallQuery = null, bool forceNoTools = false)
Expand Down
3 changes: 2 additions & 1 deletion src/Netclaw.Cli.Tests/Tui/ChatPageTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,8 @@ public TestChatViewModel(ToolInteractionRequest? seed)
new DaemonClient("http://127.0.0.1:1"),
TimeProvider.System,
new ModelCapabilities { ModelId = "test-model" },
new ChatNavigationState())
new ChatNavigationState(),
new NetclawPaths())
{
_seed = seed;
}
Expand Down
78 changes: 77 additions & 1 deletion src/Netclaw.Cli/Tui/ChatViewModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public partial class ChatViewModel : ReactiveViewModel
private readonly DaemonClient _daemonClient;
private readonly TimeProvider _timeProvider;
private readonly ModelCapabilities _modelCapabilities;
private readonly NetclawPaths _paths;
private string? _resumeSessionId;
private string? _initialMessage;

Expand All @@ -31,6 +32,12 @@ public partial class ChatViewModel : ReactiveViewModel
private readonly Queue<ToolInteractionRequest> _pendingInteractions = new();
private IDisposable? _daemonOutputSubscription;
private IDisposable? _daemonConnectionSubscription;
// Per-session USAGE log writer. Mirrors HeadlessChannel's writer so the
// canonical signalr-{sessionId}.log file receives USAGE: in=... cached=...
// lines for both TUI and -p driven sessions. Without this, post-hoc KV
// cache analysis and eval tooling that anchors on the per-session log
// silently gets no data from TUI turns (issue #1173).
private StreamWriter? _usageLog;
private bool _sessionReady;
private int _connectAttempts;
private readonly ObservableCollection<string> _approvalOptions = [];
Expand Down Expand Up @@ -73,11 +80,13 @@ public ChatViewModel(
DaemonClient daemonClient,
TimeProvider timeProvider,
ModelCapabilities modelCapabilities,
ChatNavigationState navigationState)
ChatNavigationState navigationState,
NetclawPaths paths)
{
_daemonClient = daemonClient;
_timeProvider = timeProvider;
_modelCapabilities = modelCapabilities;
_paths = paths;
_resumeSessionId = navigationState.TakeResumeSessionId();
_initialMessage = navigationState.TakeInitialMessage();
}
Expand All @@ -95,6 +104,11 @@ protected virtual Task InitializeSessionAsync()
{
_outputSubject.OnNext(output);

if (output is UsageOutput usage)
{
AppendUsageLog(usage);
}

switch (output)
{
case ToolInteractionRequest interaction:
Expand Down Expand Up @@ -341,11 +355,72 @@ internal void SeedPendingInteractionForTesting(ToolInteractionRequest interactio
RequestRedraw();
}

/// <summary>
/// Opens the per-session USAGE log file if not already open. Matches
/// HeadlessChannel's filename and append semantics so a single session
/// driven by both -p (headless) and TUI clients accumulates USAGE
/// lines in one canonical log. Safe to call repeatedly — guarded by
/// the null check so reconnects don't reopen the file.
/// </summary>
private void OpenUsageLogIfNeeded(string sessionIdValue)
{
if (_usageLog is not null)
return;

try
{
var logFileName = $"signalr-{sessionIdValue.Replace("/", "-", StringComparison.Ordinal)}.log";
var logPath = Path.Combine(_paths.LogsDirectory, logFileName);
_usageLog = new StreamWriter(logPath, append: true) { AutoFlush = true };
_usageLog.WriteLine($"[{_timeProvider.GetUtcNow():o}] TUI session attached: {sessionIdValue}");
}
catch (IOException ex)
{
// Logging is best-effort — never let a log-file failure break
// the live chat session. The daemon-side SessionLogActor at
// ~/.netclaw/logs/sessions/{id}/session.log remains the
// authoritative audit trail; surface the open failure to
// Debug so a misconfigured logs directory is at least visible
// under a debugger rather than silently lost.
System.Diagnostics.Debug.WriteLine($"ChatViewModel: failed to open USAGE log for session {sessionIdValue}: {ex.Message}");
_usageLog = null;
}
}

/// <summary>
/// Writes a USAGE line in the exact format produced by
/// HeadlessChannel.cs so existing tooling that grep's for "USAGE: in="
/// in the per-session log Just Works against TUI sessions too.
/// </summary>
private void AppendUsageLog(UsageOutput msg)
{
if (_usageLog is null)
return;

try
{
_usageLog.WriteLine(
$"[{_timeProvider.GetUtcNow():o}] USAGE: in={msg.InputTokens} out={msg.OutputTokens} total={msg.TotalTokens} cached={msg.CachedInputTokens} reasoning={msg.ReasoningTokens} context_window={msg.ContextWindowTokens} prompt_ms={msg.PromptMs} predicted_tok_s={msg.PredictedPerSecond}");
}
catch (IOException ex)
{
// See OpenUsageLogIfNeeded: best-effort logging that must not
// affect the live session. Disk-full or rotation races land
// here and would otherwise spam every turn — disable further
// attempts on this session by dropping the writer reference.
System.Diagnostics.Debug.WriteLine($"ChatViewModel: USAGE log write failed, disabling per-session log: {ex.Message}");
_usageLog.Dispose();
_usageLog = null;
}
}

public override void Dispose()
{
_daemonOutputSubscription?.Dispose();
_daemonConnectionSubscription?.Dispose();
_outputSubject.Dispose();
_usageLog?.Dispose();
_usageLog = null;

IsGenerating.Dispose();
IsInputEnabled.Dispose();
Expand Down Expand Up @@ -397,6 +472,7 @@ private async Task EnsureSessionAndFlushAsync()
? await _daemonClient.ResumeSessionAsync(resumeId, DaemonClient.TuiChannelType)
: await _daemonClient.EnsureSessionAsync(DaemonClient.TuiChannelType);
SessionIdDisplay.Value = sessionId;
OpenUsageLogIfNeeded(sessionId);
_sessionReady = true;
IsInputEnabled.Value = true;
_connectAttempts = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,139 @@ [new ChatMessage(ChatRole.User, "save")],
Assert.Equal("store_memory", toolCall.Name);
}

// ─── Phase 0 diagnostic tests for residual KV cache prefix drift ─────────
// These tests pin down byte-level invariants the existing logical
// assertions (Role/Text comparison) don't catch. A failure here means
// some serialization step between Assemble and the HTTP body is
// introducing per-turn drift that NormalizeMessages alone can't see.

[Fact]
public async Task Outbound_payload_static_prefix_byte_identical_when_only_volatile_context_differs()
{
// Post-#1176: per-turn volatile context lives inside <context>...
// </context> on the LAST User message rather than a trailing System
// tail (vLLM rejects trailing System messages). The cache prefix
// invariant becomes: leading System + every message except the last
// are byte-identical across consecutive turns.
var turn1Messages = new[]
{
new ChatMessage(ChatRole.System, "You are Netclaw, a helpful assistant.\n\n[session]\nid: test/cache-stability"),
new ChatMessage(ChatRole.User, "<context>\n[memory-recall]\nstatus: healthy\nrecall-A payload\n</context>\n\nfirst question"),
};
var turn2Messages = new[]
{
new ChatMessage(ChatRole.System, "You are Netclaw, a helpful assistant.\n\n[session]\nid: test/cache-stability"),
new ChatMessage(ChatRole.User, "first question"),
new ChatMessage(ChatRole.Assistant, "first answer"),
new ChatMessage(ChatRole.User, "<context>\n[memory-recall]\nstatus: healthy\nrecall-B payload completely different\n</context>\n\nsecond question"),
};

var bodies = await CaptureTwoRequestBodies(turn1Messages, turn2Messages);

// Strip the trailing volatile-carrying User message from each and
// assert byte equality position-by-position over what remains.
var turn1Static = ExtractStaticPrefixMessages(bodies.body1);
var turn2Static = ExtractStaticPrefixMessages(bodies.body2);
Assert.True(turn2Static.Count >= turn1Static.Count,
$"Turn 2 must have at least as many static messages as turn 1 (turn1={turn1Static.Count}, turn2={turn2Static.Count}).");
for (var i = 0; i < turn1Static.Count; i++)
{
Assert.Equal(turn1Static[i], turn2Static[i]);
}
}

[Fact]
public async Task Outbound_payload_tools_array_byte_identical_across_calls_with_same_tool_set()
{
// Regression B canary: two consecutive calls with the same tool
// collection (same order, same definitions) must serialize the
// `tools` field to byte-identical JSON. A failure here means
// tool-list serialization is non-deterministic, which would bust
// the cache the moment any tool is added to a session.
using var schemaDoc = JsonDocument.Parse(
"{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\"}},\"required\":[\"query\"]}");
var toolA = AIFunctionFactory.CreateDeclaration("search_tools", "Search tools", schemaDoc.RootElement);
using var storeSchemaDoc = JsonDocument.Parse(
"{\"type\":\"object\",\"properties\":{\"text\":{\"type\":\"string\"}},\"required\":[\"text\"]}");
var toolB = AIFunctionFactory.CreateDeclaration("store_memory", "Store memory", storeSchemaDoc.RootElement);

var sameMessages = new[] { new ChatMessage(ChatRole.User, "hello") };

var capturedBodies = new List<string>();
using var handler = new RecordingHandler(req =>
{
capturedBodies.Add(req.Content!.ReadAsStringAsync().GetAwaiter().GetResult());
return new HttpResponseMessage(HttpStatusCode.OK)
{
Content = new StringContent(
"{\"id\":\"1\",\"model\":\"test\",\"choices\":[{\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"}}]}",
Encoding.UTF8, "application/json")
};
});
using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") };
var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000");
var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model");

var options = new ChatOptions { Tools = [toolA, toolB] };
await client.GetResponseAsync(sameMessages, options, cancellationToken: TestContext.Current.CancellationToken);
await client.GetResponseAsync(sameMessages, options, cancellationToken: TestContext.Current.CancellationToken);

Assert.Equal(2, capturedBodies.Count);
var tools1 = ExtractToolsJson(capturedBodies[0]);
var tools2 = ExtractToolsJson(capturedBodies[1]);
Assert.Equal(tools1, tools2);
}

private async Task<(string body1, string body2)> CaptureTwoRequestBodies(
IReadOnlyList<ChatMessage> turn1, IReadOnlyList<ChatMessage> turn2)
{
var bodies = new List<string>();
using var handler = new RecordingHandler(req =>
{
bodies.Add(req.Content!.ReadAsStringAsync().GetAwaiter().GetResult());
return new HttpResponseMessage(HttpStatusCode.OK)
{
Content = new StringContent(
"{\"id\":\"1\",\"model\":\"test\",\"choices\":[{\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"}}]}",
Encoding.UTF8, "application/json")
};
});
using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") };
var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000");
var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model");

await client.GetResponseAsync(turn1, cancellationToken: TestContext.Current.CancellationToken);
await client.GetResponseAsync(turn2, cancellationToken: TestContext.Current.CancellationToken);
Assert.Equal(2, bodies.Count);
return (bodies[0], bodies[1]);
}

private static List<string> ExtractStaticPrefixMessages(string requestBody)
{
// Return the messages array as a list of per-element JSON strings,
// with the trailing message stripped. Post-#1176 the trailing
// message is the volatile-carrying User turn (with <context>);
// pre-#1176 it was a trailing System tail. Either way, that final
// entry is the only one allowed to diverge between turns; the rest
// must be byte-identical position-for-position for the KV cache
// prefix to extend.
var root = JsonNode.Parse(requestBody)!.AsObject();
var messages = root["messages"]!.AsArray();
var end = messages.Count > 0 ? messages.Count - 1 : 0;
var result = new List<string>(end);
for (var i = 0; i < end; i++)
result.Add(messages[i]!.ToJsonString());
return result;
}

private static string ExtractToolsJson(string requestBody)
{
using var doc = JsonDocument.Parse(requestBody);
if (!doc.RootElement.TryGetProperty("tools", out var toolsElement))
return "(none)";
return toolsElement.GetRawText();
}

private sealed class RecordingHandler : HttpMessageHandler
{
private readonly Func<HttpRequestMessage, HttpResponseMessage> _handler;
Expand Down
Loading
Loading