Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,51 @@ await AwaitAssertAsync(() =>
}, cancellationToken: ct);
}

// Regression for #1164: when no approval has ever been requested in the session,
// a short message like "yes", "a", or "1" should NOT be consumed by the cold
// approval path. The message must fall through to normal LLM ingress.
[Fact]
public async Task Normal_chat_text_that_looks_like_approval_is_not_consumed_when_no_approval_history()
{
var ct = TestContext.Current.CancellationToken;
var detector = new ConfigurablePromptInjectionDetector(PromptInjectionResult.Safe());

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let all content through the prompt injection detector

var sid = new SessionId("session-cold-text-false-positive");

// Empty output stream: the binding never observed an approval prompt,
// so _hasObservedApprovalRequest stays false and the cold path is active.
// The ResponseFactory simulates the session rejecting the cold-path
// response with approval_no_history (meaning: no approval ever existed).
var pipeline = new RecordingSessionPipeline(_ => [])
{
ResponseFactory = (feedback, _) =>
{
return feedback is ToolInteractionTextResponse
? Task.FromResult<ICommandReply>(CommandNack.For(sid, "approval_no_history"))

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

send a CommandNack back if the feedback is part of a tool text interaction, i.e. sending A/B/C/D instead of clicking a button for approval signaling.

: Task.FromResult<ICommandReply>(CommandAck.For(feedback.SessionId));
}
};

var actor = CreateBindingActorWithPipeline(sid, pipeline, detector);

// Send a message that LooksLikeApprovalResponse matches ("yes" -> ApproveOnce)
// but is ordinary conversation. With the fix, the message falls through
// to normal ChannelInput ingestion.
actor.Tell(CreateInboundMessage("yes", "user-1"), TestActor);

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"yes" is a keyword in the text matching pipeline apparently? We should double check that - that was not my understanding.


await AwaitAssertAsync(() =>
{
// The cold path should have forwarded the message to the session
Assert.Single(pipeline.RecordedFeedback.OfType<ToolInteractionTextResponse>());

// The message should NOT be consumed — it must fall through to normal input
Assert.NotEmpty(pipeline.CapturedInputs);
Assert.True(
pipeline.CapturedInputs.Any(ci =>
ci.Contents.Any(c => c is TextContent tc && tc.Text == "yes")),
"The original message text should appear in ChannelInput");
}, cancellationToken: ct);
}

// Regression for the silent-drop class of bugs: the binding observes a
// ToolInteractionRequest then a TurnCompleted (which clears its local
// _pendingApprovalRequests). A button click arriving afterwards must still
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public RecordingSessionPipeline(
public SessionPipelineOptions? CapturedOptions { get; private set; }
public List<IWithSessionId> RecordedFeedback { get; } = [];
public ConcurrentQueue<ChannelInput> CapturedInputs { get; } = new();
public Func<IWithSessionId, CancellationToken, Task<ICommandReply>>? ResponseFactory { get; set; }

public Task<MaterializedSession> CreateAsync(
SessionId sessionId,
Expand Down Expand Up @@ -125,6 +126,8 @@ public Task SendFeedbackAsync(IWithSessionId feedback, CancellationToken ct = de
public Task<ICommandReply> SendFeedbackAndWaitAsync(IWithSessionId feedback, CancellationToken ct = default)
{
RecordedFeedback.Add(feedback);
return Task.FromResult<ICommandReply>(CommandAck.For(feedback.SessionId));
var response = ResponseFactory?.Invoke(feedback, ct)
?? Task.FromResult<ICommandReply>(CommandAck.For(feedback.SessionId));
return response;
}
}
22 changes: 19 additions & 3 deletions src/Netclaw.Actors/Sessions/LlmSessionActor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3262,6 +3262,10 @@ private void EmitUsageOutput(UsageDetails usage)
_ => ApprovalDecision.Denied
};

private bool HasApprovalHistory
=> _resolvedToolApprovals.Count > 0
|| ParkedToolBatchHistory.FindRedrivableAssistantMessage(_state.History, null) is not null;

/// <summary>
/// Emits the channel-visible "approval prompt expired" notice. Used when a
/// tool interaction response cannot be honored — fail loud instead of
Expand Down Expand Up @@ -3378,9 +3382,21 @@ private bool TryResolveTextApprovalResponse(
{
if (_pendingToolInteractions.Count == 0)
{
_log.Warning("Ignoring text tool interaction response with no pending approvals for sender {SenderId}", msg.SenderId);
EmitExpiredPromptNotice();
nackReason = "approval_prompt_expired";
if (HasApprovalHistory)
{
_log.Warning("Ignoring text tool interaction response with no pending approvals for sender {SenderId}", msg.SenderId);
EmitExpiredPromptNotice();
nackReason = "approval_prompt_expired";

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs to be a const string if it has semantic meaning inside the application.

}
else
{
// Session has never had an approval request. The channel cold path
// matched the text as approval-like, but this is almost certainly
// ordinary conversation (e.g., "yes", "a", "1"). Don't emit a
// user-visible notice and don't consume — the channel should
// fall through to normal LLM ingress. See #1164.
nackReason = "approval_no_history";

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

}
}
else
{
Expand Down
6 changes: 6 additions & 0 deletions src/Netclaw.Channels.Discord/DiscordSessionBindingActor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,12 @@ private async Task<bool> TryHandleColdTextApprovalResponseAsync(DiscordThreadInb
return true;
}

// approval_no_history means the session has never had an approval request.
// The message was a false-positive from LooksLikeApprovalResponse.
// Don't consume — let it fall through to normal LLM ingress. See #1164.
if (reply is CommandNack { Reason: "approval_no_history" })

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These reasons either need to be an enum or a constant.

return false;

return reply is CommandNack;
}
catch (Exception ex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,12 @@ private async Task<bool> TryHandleColdTextApprovalResponseAsync(MattermostThread
return true;
}

// approval_no_history means the session has never had an approval request.
// The message was a false-positive from LooksLikeApprovalResponse.
// Don't consume — let it fall through to normal LLM ingress. See #1164.
if (reply is CommandNack { Reason: "approval_no_history" })
return false;

return reply is CommandNack;
}
catch (Exception ex)
Expand Down
6 changes: 6 additions & 0 deletions src/Netclaw.Channels.Slack/SlackThreadBindingActor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,12 @@ private async Task<bool> TryHandleColdTextApprovalResponseAsync(SlackThreadInbou
return true;
}

// approval_no_history means the session has never had an approval request.
// The message was a false-positive from LooksLikeApprovalResponse.
// Don't consume — let it fall through to normal LLM ingress. See #1164.
if (reply is CommandNack { Reason: "approval_no_history" })

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the other comments about CommandNack reasons.

return false;

return reply is CommandNack;
}
catch (Exception ex)
Expand Down
Loading