diff --git a/src/Netclaw.Actors.Tests/Sessions/DeterministicCandidateSelectorTests.cs b/src/Netclaw.Actors.Tests/Sessions/DeterministicCandidateSelectorTests.cs index cc9b7e701..ce609090b 100644 --- a/src/Netclaw.Actors.Tests/Sessions/DeterministicCandidateSelectorTests.cs +++ b/src/Netclaw.Actors.Tests/Sessions/DeterministicCandidateSelectorTests.cs @@ -75,10 +75,12 @@ public void Candidate_with_single_lexical_match_survives_threshold() public void Baseline_only_candidates_excluded_from_scored_results() { var selector = new DeterministicCandidateSelector(); - var plan = MakePlan(lexicalTerms: ["kubernetes"]); - // Cross-domain noise item has no lexical or domain match — baseline only. + // Use a lexical term that matches document text exactly (no plural + // normalization mismatch). Token "docker" is unaffected by the + // tokenizer's plural rules. + var plan = MakePlan(lexicalTerms: ["docker"]); var noise = MakeItem("doc-noise", "Unrelated", "Something about databases.", domain: "project:other"); - var relevant = MakeItem("doc-relevant", "K8s Guide", "Deploy to kubernetes cluster."); + var relevant = MakeItem("doc-relevant", "Docker Guide", "Docker container deployment notes."); var result = selector.SelectWithScores(plan, [noise, relevant]); @@ -88,8 +90,13 @@ public void Baseline_only_candidates_excluded_from_scored_results() } [Fact] - public void Same_domain_candidate_ranks_higher_than_cross_domain() + public void Domain_is_not_a_scoring_signal() { + // Domain affinity was intentionally removed — the concept is + // half-implemented (Protocol.SessionId.ToMemoryDomain always returns + // project:default) and it was polluting the composite-score floor. + // Same-domain and cross-domain candidates with identical content + // must score identically. Tracked in #584. var selector = new DeterministicCandidateSelector(); var plan = MakePlan( hardScope: "project:d0ac6ckbk5k", @@ -98,10 +105,10 @@ public void Same_domain_candidate_ranks_higher_than_cross_domain() var sameDomain = MakeItem("doc-same", "Company: Petabridge", "Petabridge builds Akka.NET.", domain: "project:d0ac6ckbk5k"); var crossDomain = MakeItem("doc-cross", "Company: Petabridge", "Petabridge builds Akka.NET.", domain: "project:signalr"); - var result = selector.Select(plan, [crossDomain, sameDomain]); + var result = selector.SelectWithScores(plan, [crossDomain, sameDomain]); Assert.Equal(2, result.Count); - Assert.Equal("doc-same", result[0].Id); + Assert.Equal(result[0].SelectorScore, result[1].SelectorScore); } [Fact] @@ -131,4 +138,126 @@ public void Evidence_class_candidates_are_selected() Assert.Single(result); } + + // Score geometry documentation. These tests document the gradient a + // downstream composite-score floor will see: weaker matches score lower + // than stronger matches, and the spread between "single feature hit" and + // "multi-feature hit" is large enough to be a useful discriminator. + // + // Note: TextTokenizer normalizes plurals ("streams" -> "stream") and + // treats hyphenated words as single tokens. Lexical terms here are kept + // in normalized singular form so they match what the tokenizer produces. + // In production, the planner also runs prompts through TextTokenizer so + // plan.LexicalTerms is consistent with document tokens by construction. + + [Fact] + public void Score_geometry_stronger_matches_outrank_weaker_matches() + { + var selector = new DeterministicCandidateSelector(); + var plan = MakePlan( + lexicalTerms: ["akka", "stream", "backpressure", "demand"], + hardScope: "project:d0ac6ckbk5k"); + + var weak = MakeItem( + "doc-weak", + "Unrelated Guide", + "This note mentions akka once, nothing else.", + domain: "project:other"); + var medium = MakeItem( + "doc-medium", + "Akka Stream Overview", + "Akka stream uses demand signalling.", + domain: "project:other"); + var strong = MakeItem( + "doc-strong", + "Akka Stream Backpressure", + "Demand backpressure flow control in akka stream.", + domain: "project:d0ac6ckbk5k"); + + var result = selector.SelectWithScores(plan, [weak, medium, strong]); + + Assert.Equal(3, result.Count); + // Results are returned in descending order of SelectorScore. + Assert.Equal("doc-strong", result[0].Item.Id); + Assert.Equal("doc-medium", result[1].Item.Id); + Assert.Equal("doc-weak", result[2].Item.Id); + + // Document the spread: strongest match should be at least 2x the + // weakest. If this ever drops below that, the composite floor loses + // its ability to discriminate and we need to rebalance. + Assert.True( + result[0].SelectorScore >= result[2].SelectorScore * 2, + $"Expected strong/weak spread of 2x+, got {result[0].SelectorScore}/{result[2].SelectorScore}"); + } + + [Fact] + public void Score_geometry_facet_match_adds_meaningful_weight() + { + var selector = new DeterministicCandidateSelector(); + var plan = MakePlan( + lexicalTerms: ["stream"], + facets: ["akka-streams"], + hardScope: "project:other"); + + var withoutFacet = MakeItem( + "doc-no-facet", + "Akka Stream", + "Backpressure in akka stream.", + domain: "project:other"); + var withFacet = new SQLiteMemoryHydratedItem( + Id: "doc-with-facet", + Kind: "document", + MemoryClass: "durable_fact", + Title: "Akka Stream", + Content: "Backpressure in akka stream.", + AliasesJson: null, + FacetsJson: "[\"akka-streams\"]", + SlotsJson: null, + Domain: "project:other", + Boundary: "boundary:trusted-instance", + Audience: "public", + Sensitivity: "normal", + RecallMode: "auto", + UpdateSemantics: "merge-document", + ExpiresAtMs: null, + UpdatedAtMs: DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()); + + var result = selector.SelectWithScores(plan, [withoutFacet, withFacet]); + + Assert.Equal(2, result.Count); + Assert.Equal("doc-with-facet", result[0].Item.Id); + Assert.True( + result[0].SelectorScore - result[1].SelectorScore >= 5.0, + $"Facet match should add at least 5 points; got delta {result[0].SelectorScore - result[1].SelectorScore}"); + } + + [Fact] + public void Score_geometry_anchor_match_adds_meaningful_weight() + { + var selector = new DeterministicCandidateSelector(); + var plan = MakePlan( + lexicalTerms: ["stream"], + anchorHints: ["Akka Stream Backpressure"], + hardScope: "project:other"); + + var noAnchor = MakeItem( + "doc-no-anchor", + "Something Else", + "Akka stream is useful.", + domain: "project:other"); + var withAnchor = MakeItem( + "doc-with-anchor", + "Akka Stream Backpressure", + "Demand in akka stream.", + domain: "project:other"); + + var result = selector.SelectWithScores(plan, [noAnchor, withAnchor]); + + Assert.Equal(2, result.Count); + Assert.Equal("doc-with-anchor", result[0].Item.Id); + Assert.True( + result[0].SelectorScore - result[1].SelectorScore >= 7.0, + $"Anchor match should add at least 7 points; got delta {result[0].SelectorScore - result[1].SelectorScore}"); + } + } diff --git a/src/Netclaw.Actors.Tests/Sessions/MemoryRecallScenarioTests.cs b/src/Netclaw.Actors.Tests/Sessions/MemoryRecallScenarioTests.cs new file mode 100644 index 000000000..80384defe --- /dev/null +++ b/src/Netclaw.Actors.Tests/Sessions/MemoryRecallScenarioTests.cs @@ -0,0 +1,334 @@ +using Microsoft.Data.Sqlite; +using Microsoft.Extensions.Logging.Abstractions; +using Netclaw.Actors.Memory; +using Netclaw.Actors.Sessions; +using Netclaw.Configuration; +using Xunit; + +namespace Netclaw.Actors.Tests.Sessions; + +/// +/// Scenario suite for the memory recall composite-score floor (issue #582). +/// +/// Seeds a 16-document corpus mirroring the production DB shape that caused +/// the pollution bug (a cluster of ops/eval trivia plus two topical clusters +/// of legitimate content), then drives a table of 18 prompts through the +/// real and asserts which +/// memories may or may not appear in the recall result for each prompt. +/// +/// Fixture table is documented in memorizer: "Netclaw Memory Recall Floor — +/// Test Scenario Fixture (issue #582)". +/// +/// Diagnostic rows are the ones where the floor is doing real work: +/// P11–P14 (lexical collisions against the noise band) and P16 (hard +/// negative against an ops-heavy corpus). +/// +/// Document-vs-record priority is a separate concern handled by RecallRank +/// weights, not the composite floor, and is deliberately out of scope here. +/// The corpus contains only durable-fact documents. +/// +public sealed class MemoryRecallScenarioTests : IAsyncLifetime +{ + // The coordinator normalizes every session's hard scope to + // SecurityPolicyDefaults.DefaultMemoryDomain via ToMemoryDomain() before + // the planner runs (tracked in #584). Seeding here into the same domain + // reproduces the actual production DB layout that caused issue #582 — + // cross-domain test fixtures were silently exercising a different + // scoring regime than the real bug. + private const string TestDomain = "project:default"; + private const string TestSessionId = "test/thread-1"; + + private readonly string _baseDir = Path.Combine( + Path.GetTempPath(), + "netclaw-recall-scenarios", + Guid.NewGuid().ToString("N")); + private readonly string _dbPath; + private readonly SQLiteMemoryStore _store; + + public MemoryRecallScenarioTests() + { + Directory.CreateDirectory(_baseDir); + _dbPath = Path.Combine(_baseDir, "netclaw-recall-scenarios.db"); + _store = new SQLiteMemoryStore(_dbPath, TimeProvider.System); + } + + public async ValueTask InitializeAsync() + { + await _store.InitializeAsync(CancellationToken.None); + await SeedCorpusAsync(CancellationToken.None); + } + + public async ValueTask DisposeAsync() + { + await TryDeleteDirectoryAsync(_baseDir); + } + + /// + /// Scenario rows: (id, prompt, expectedIds, forbiddenIds). + /// Don't-care IDs (see P15) are simply absent from both lists. + /// + public static IEnumerable Scenarios() + { + // Easy positives + yield return Row("P01", + "How does backpressure work in Akka Streams?", + expected: ["M07"], + forbidden: NoiseBand); + yield return Row("P02", + "When do we ship new minor versions?", + expected: ["M13"], + forbidden: NoiseBand); + yield return Row("P03", + "How often should I snapshot a PersistentActor?", + expected: ["M09"], + forbidden: NoiseBand); + yield return Row("P04", + "What's our Sev2 response time for commercial support?", + expected: ["M14"], + forbidden: NoiseBand); + + // Subtle positives (paraphrase — no exact keyword in the doc title) + yield return Row("P05", + "I need to rebalance entities across nodes", + expected: ["M08"], + forbidden: NoiseBand); + yield return Row("P06", + "Best way to test an actor that sends messages", + expected: ["M10"], + forbidden: NoiseBand); + yield return Row("P07", + "How do I build a read model from an event log?", + expected: ["M12"], + forbidden: NoiseBand); + + // Easy positives (second batch) + yield return Row("P08", + "What transport does Akka.Remote use?", + expected: ["M11"], + forbidden: NoiseBand); + yield return Row("P09", + "Which .NET versions does our CI cover?", + expected: ["M16"], + forbidden: NoiseBand); + yield return Row("P10", + "Are our NuGet packages signed?", + expected: ["M15"], + forbidden: NoiseBand); + + // Word collisions against the noise band — the critical diagnostic rows. + yield return Row("P11", + "I lost context in this conversation, can you recap?", + expected: [], + forbidden: ["M02"]); + yield return Row("P12", + "The shell in my bash is acting weird", + expected: [], + forbidden: ["M01", "M04"]); + // P13 (deliberately omitted): "Can we get the SignalR integration + // working again?" against M03 (slack channel allowlist for the + // 'signalr' channel). This is a legitimate lexical collision — both + // query and memory literally contain "signalr" — and distinguishing + // the framework from the channel name requires semantic context the + // deterministic scorer doesn't have. Out of scope for the floor. + yield return Row("P14", + "How's the system doing overall?", + expected: [], + forbidden: ["M06"]); + + // Multi-hit positive + yield return Row("P15", + "Tell me about Akka testing patterns", + expected: ["M10"], + forbidden: NoiseBand); + + // Hard negative — nothing should match an off-topic query against this DB. + yield return Row("P16", + "What's the deployment environment like for the agent?", + expected: [], + forbidden: NoiseBand); + + // Stress paths — empty and stopword-only queries. + yield return Row("P19", + "", + expected: [], + forbidden: NoiseBand); + yield return Row("P20", + "a the and or", + expected: [], + forbidden: NoiseBand); + } + + [Theory] + [MemberData(nameof(Scenarios))] + public async Task Scenario_matches_expected_and_rejects_forbidden( + string scenarioId, + string prompt, + string[] expectedIds, + string[] forbiddenIds) + { + _ = scenarioId; // carried for failure diagnostics + var coordinator = new SQLiteMemoryRecallCoordinator( + _store, + NullLogger.Instance, + sessionTuning: new SessionTuning { MemorySidecarsEnabled = false }); + + var request = new AutomaticRecallRequest( + SessionId: TestSessionId, + Query: prompt, + RecentUserMessages: string.IsNullOrEmpty(prompt) ? [] : [prompt], + MaxItems: 3, + Audience: TrustAudience.Public); + + var result = await coordinator.RecallAsync(request, TestContext.Current.CancellationToken); + + Assert.False( + result.Degraded, + $"[{scenarioId}] recall degraded: {result.DegradeStage}/{result.DegradeReason}"); + + var returnedIds = result.Items.Select(i => i.Id).ToArray(); + var returnedWithScores = string.Join(", ", result.Items.Select(i => $"{i.Id}={i.Score:F3}")); + + foreach (var expected in expectedIds) + { + Assert.True( + returnedIds.Contains(expected), + $"[{scenarioId}] expected {expected} in result, got [{returnedWithScores}]"); + } + + foreach (var forbidden in forbiddenIds) + { + Assert.False( + returnedIds.Contains(forbidden), + $"[{scenarioId}] forbidden {forbidden} leaked into result, got [{returnedWithScores}]"); + } + } + + private static object[] Row(string id, string prompt, string[] expected, string[] forbidden) + => [id, prompt, expected, forbidden]; + + + // The noise band — ops/eval trivia mirroring the polluting docs from #582. + // Most scenarios assert that none of these leak into the recall result. + private static readonly string[] NoiseBand = + [ + "M01", "M02", "M03", "M04", "M05", "M06" + ]; + + private async Task SeedCorpusAsync(CancellationToken ct) + { + var now = TimeProvider.System.GetUtcNow().ToUnixTimeMilliseconds(); + + // Each memory gets its own anchor so anchor-dedup doesn't collapse them. + // Documents go through UpsertDocumentAsync; records go through the batch + // API since UpsertDocumentAsync is document-only. + + // --- Noise band (ops / eval trivia) --- + await UpsertDoc("M01", "Full Host Shell Access Permission", + "Personal profile allows full host shell access, which carries a high blast radius risk.", + facets: "[\"operational\",\"shell\"]", now: now, ct: ct); + await UpsertDoc("M02", "Default Context Window Configuration", + "No explicit context window is configured. The system uses the default 32K token context window.", + facets: "[\"operational\",\"context-window\"]", now: now, ct: ct); + await UpsertDoc("M03", "Slack Channel Access Restrictions", + "The signalr channel is not in the allowed channels list for posting messages.", + facets: "[\"operational\",\"slack\"]", now: now, ct: ct); + await UpsertDoc("M04", "Shell Execution Environment Restriction", + "Shell execution is denied in the current environment. Use web search alternatives.", + facets: "[\"operational\",\"shell\"]", now: now, ct: ct); + await UpsertDoc("M05", "Development Environment Configuration", + "Development environment uses unrestricted filesystem access with MCP tools not scoped.", + facets: "[\"operational\",\"devenv\"]", now: now, ct: ct); + await UpsertDoc("M06", "System Diagnostic Health", + "netclaw doctor shows the system is technically healthy with recommendations for tuning.", + facets: "[\"operational\",\"diagnostics\"]", now: now, ct: ct); + + // --- Akka technical cluster --- + await UpsertDoc("M07", "Akka Stream Backpressure Semantics", + "Akka stream uses demand-based backpressure. Consumers signal how many elements they can accept. Async boundaries use bounded buffers.", + facets: "[\"akka\",\"streams\"]", now: now, ct: ct); + await UpsertDoc("M08", "Cluster Sharding Entity Placement", + "Entity actors are distributed across cluster shards via a shard extractor. Shards rebalance across nodes on member changes.", + facets: "[\"akka\",\"cluster\",\"sharding\"]", now: now, ct: ct); + await UpsertDoc("M09", "Akka Persistence Snapshot Strategy", + "PersistentActor snapshots reduce recovery time. Typical cadence is every 1000 events.", + facets: "[\"akka\",\"persistence\"]", now: now, ct: ct); + await UpsertDoc("M10", "Akka TestKit Probe Patterns", + "Use TestProbe to assert on actor messages. ExpectMsg with timeouts. Avoid Thread.Sleep in tests.", + facets: "[\"akka\",\"testing\"]", now: now, ct: ct); + await UpsertDoc("M11", "Akka Remoting Transport", + "Akka.Remote uses the DotNetty TCP transport. Artery has not yet been ported to .NET.", + facets: "[\"akka\",\"remoting\"]", now: now, ct: ct); + await UpsertDoc("M12", "EventSourced Projection Pattern", + "Read-side projections consume PersistenceQuery journal streams to build queryable read models from the event log.", + facets: "[\"akka\",\"persistence\",\"cqrs\"]", now: now, ct: ct); + + // --- Release / ops cluster --- + await UpsertDoc("M13", "Release Cadence Policy", + "We ship minor versions roughly quarterly. Patches go out as needed.", + facets: "[\"release\",\"ops\"]", now: now, ct: ct); + await UpsertDoc("M14", "Commercial Support SLA", + "Commercial support responds within 1 business day for Sev2 issues.", + facets: "[\"support\",\"commercial\"]", now: now, ct: ct); + await UpsertDoc("M15", "NuGet Package Signing", + "All published NuGet packages are Authenticode-signed before push.", + facets: "[\"release\",\"packaging\"]", now: now, ct: ct); + await UpsertDoc("M16", "CI Build Matrix", + "CI runs net8.0 and net9.0 on Linux and Windows runners for every pull request.", + facets: "[\"ci\",\"build\"]", now: now, ct: ct); + + } + + private async Task UpsertDoc( + string id, + string title, + string body, + string facets, + long now, + CancellationToken ct) + { + var anchor = _store.CreateDefaultAnchor(id, TestDomain); + await _store.UpsertDocumentAsync(new SQLiteMemoryDocument( + DocumentId: id, + Anchor: anchor, + MemoryClass: "durable_fact", + Title: title, + MarkdownBody: body, + AliasesJson: null, + FacetsJson: facets, + SlotsJson: null, + UpdateSemantics: "merge-document", + Domain: TestDomain, + Sensitivity: "normal", + RecallMode: "auto", + Confidence: 0.9, + FreshnessAtMs: now, + ExpiresAtMs: null, + CreatedAtMs: now, + UpdatedAtMs: now), ct); + } + + private static async Task TryDeleteDirectoryAsync(string path) + { + if (!Directory.Exists(path)) + return; + + SqliteConnection.ClearAllPools(); + + for (var i = 0; i < 8; i++) + { + try + { + Directory.Delete(path, recursive: true); + return; + } + catch (IOException) when (i < 7) + { + await Task.Delay(25 * (i + 1)); + } + catch (UnauthorizedAccessException) when (i < 7) + { + await Task.Delay(25 * (i + 1)); + } + } + } +} diff --git a/src/Netclaw.Actors/Sessions/DeterministicCandidateSelector.cs b/src/Netclaw.Actors/Sessions/DeterministicCandidateSelector.cs index 003ea822a..e55a1bbf1 100644 --- a/src/Netclaw.Actors/Sessions/DeterministicCandidateSelector.cs +++ b/src/Netclaw.Actors/Sessions/DeterministicCandidateSelector.cs @@ -11,7 +11,6 @@ public sealed class DeterministicCandidateSelector private const double FacetMatchWeight = 6.0; private const double AnchorMatchWeight = 8.0; private const double SoftScopeWeight = 3.5; - private const double DomainAffinityWeight = 5.0; public IReadOnlyList Select( DeterministicRetrievalRequestPlan plan, @@ -65,10 +64,15 @@ private static double Score(DeterministicRetrievalRequestPlan plan, SQLiteMemory if (text.Contains(scope.Replace("scope:", string.Empty, StringComparison.OrdinalIgnoreCase), StringComparison.OrdinalIgnoreCase)) score += SoftScopeWeight; - // Domain affinity: same-domain memories rank higher but cross-domain - // memories aren't excluded (audience+boundary are the security gates). - if (string.Equals(document.Domain, plan.HardScope, StringComparison.OrdinalIgnoreCase)) - score += DomainAffinityWeight; + // Domain affinity is intentionally NOT applied here. The concept is + // half-implemented: Protocol.SessionId.ToMemoryDomain() unconditionally + // returns SecurityPolicyDefaults.DefaultMemoryDomain regardless of the + // session ID, so domain affinity would only fire for memories that + // happen to be seeded in "project:default" — which is a coin flip, not + // a signal. Worse, it adds +5 to in-domain single-lexical collisions + // and makes the composite-score floor unable to discriminate them + // from legitimate two-lexical cross-domain matches. Tracked in #584; + // restore this if per-project memory scoping is actually implemented. return score; } diff --git a/src/Netclaw.Actors/Sessions/DeterministicRetrievalPlanning.cs b/src/Netclaw.Actors/Sessions/DeterministicRetrievalPlanning.cs index cb09d9a4f..eea6af879 100644 --- a/src/Netclaw.Actors/Sessions/DeterministicRetrievalPlanning.cs +++ b/src/Netclaw.Actors/Sessions/DeterministicRetrievalPlanning.cs @@ -83,6 +83,30 @@ private static bool IsTransportScopedSession(string prefix) || prefix.Equals("console", StringComparison.OrdinalIgnoreCase) || prefix.Equals("manual", StringComparison.OrdinalIgnoreCase); + // Capitalized words that commonly start English sentences and questions + // but carry no semantic weight as anchors. Broader than the tokenizer's + // stopword list because anchor hints are substring-matched against doc + // content, so even common function words ("the", "my", "our") can cause + // false positives (issue #582). Kept distinct from tokenizer stopwords + // so lexical matching on nouns like "can" and "will" still works. + private static readonly HashSet AnchorHintStopWords = new(StringComparer.OrdinalIgnoreCase) + { + // Question/modal words + "which", "what", "when", "where", "how", "why", "who", "whose", + "can", "could", "should", "would", "may", "might", "will", "shall", + // Determiners and demonstratives + "the", "this", "that", "these", "those", "there", "here", + // Be/have/do auxiliaries + "does", "did", "has", "have", "had", "was", "were", "are", "is", "be", "been", "being", + // Pronouns that commonly start sentences + "my", "our", "your", "their", "his", "her", "its", + "i", "you", "we", "they", "he", "she", "it", + // Imperative lead-ins + "tell", "show", "give", "let", "please", "kindly", + // Conjunctions / sentence glue + "but", "so", "yet", "and", "or", "nor", "for", + }; + private static IEnumerable InferAnchorHints(AutomaticRecallRequest request, string prompt, IReadOnlyList tokens) { foreach (var entity in request.RecentEntities ?? []) @@ -90,7 +114,13 @@ private static IEnumerable InferAnchorHints(AutomaticRecallRequest reque yield return entity.Trim(); foreach (Match match in Regex.Matches(prompt, "\\b[A-Z][A-Za-z0-9._-]{2,}\\b")) + { + // Filter sentence-start capitalized stopwords. These pull + // unrelated ops/eval docs into recall (issue #582). + if (AnchorHintStopWords.Contains(match.Value)) + continue; yield return match.Value; + } if (tokens.Contains("textforge")) yield return "textforge"; diff --git a/src/Netclaw.Actors/Sessions/SQLiteMemoryRecallCoordinator.cs b/src/Netclaw.Actors/Sessions/SQLiteMemoryRecallCoordinator.cs index 2f4a4c16b..991e44c36 100644 --- a/src/Netclaw.Actors/Sessions/SQLiteMemoryRecallCoordinator.cs +++ b/src/Netclaw.Actors/Sessions/SQLiteMemoryRecallCoordinator.cs @@ -23,6 +23,18 @@ public sealed class SQLiteMemoryRecallCoordinator( private readonly DeterministicRetrievalRequestPlanner _deterministicPlanner = new(); private readonly DeterministicCandidateSelector _candidateSelector = new(); + /// + /// Default minimum composite score a candidate must reach to survive + /// recall. Chosen so that a single lexical-token collision against an + /// out-of-domain durable fact (selector ~5, composite ~7) is rejected, + /// while a legitimate two-lexical match in a durable fact (selector ~9, + /// composite ~11) still passes. Callers who need stricter or looser + /// filtering can override via . + /// See and issue #582 for the + /// pollution patterns this guards against. + /// + private const double DefaultMinimumRecallCompositeScore = 10.0; + public async Task RecallAsync(AutomaticRecallRequest request, CancellationToken ct = default) { try @@ -55,9 +67,10 @@ public async Task RecallAsync(AutomaticRecallRequest requ var effectiveBoundary = ResolveBoundary(normalizedRequest, deterministicPlan.HardScope); - // Audience-primary recall: domain is a ranking preference (via - // DeterministicCandidateSelector domain affinity boost), not a - // security gate. Audience+boundary are the SQL security filters. + // Audience-primary recall: audience+boundary are the SQL + // security filters. Domain is not used as a ranking preference + // (see #584 — the affinity concept was disabled because + // ToMemoryDomain() always resolves to project:default). var rawCandidates = await store.SearchAcrossDomainsByPlanAsync( deterministicPlan.LexicalTerms.Count > 0 ? deterministicPlan.LexicalTerms : [normalizedRequest.Query], deterministicPlan.AllowedMemoryClasses, @@ -81,9 +94,15 @@ public async Task RecallAsync(AutomaticRecallRequest requ // (~4 points per lexical match). const double RecallRankDampeningFactor = 100.0; var deterministicMaxItems = normalizedRequest.MaxItems <= 0 ? 3 : normalizedRequest.MaxItems; - var deterministicItems = scoredCandidates + var minimumCompositeScore = _sessionTuning.MinimumRecallCompositeScore ?? DefaultMinimumRecallCompositeScore; + var rankedCandidates = scoredCandidates .Select(x => (x.Item, x.SelectorScore, Composite: x.SelectorScore + (RecallRank(x.Item) / RecallRankDampeningFactor))) .OrderByDescending(x => x.Composite) + .ToArray(); + var aboveFloor = rankedCandidates + .Where(x => x.Composite >= minimumCompositeScore) + .ToArray(); + var deterministicItems = aboveFloor .Take(deterministicMaxItems) .Select(x => new AutomaticRecallItem( x.Item.Id, @@ -95,9 +114,11 @@ public async Task RecallAsync(AutomaticRecallRequest requ .ToArray(); logger.LogInformation( - "memory_retrieval_final session={SessionId} injectedCount={InjectedCount} items={Items}", + "memory_retrieval_final session={SessionId} injectedCount={InjectedCount} filteredByFloor={FilteredByFloor} appliedFloor={AppliedFloor:F1} items={Items}", normalizedRequest.SessionId, deterministicItems.Length, + rankedCandidates.Length - aboveFloor.Length, + minimumCompositeScore, string.Join("|", deterministicItems.Select(i => $"{i.Id}=score{i.Score:F1}"))); logger.LogDebug( diff --git a/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json b/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json index 5d804a9ad..21e8161ff 100644 --- a/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json +++ b/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json @@ -190,7 +190,12 @@ "TitleGenerationInterval": { "type": "integer", "minimum": 0 }, "MemorySidecarsEnabled": { "type": "boolean" }, "DeterministicRetrievalEnabled": { "type": "boolean" }, - "MemoryDistillationTurnInterval": { "type": "integer", "minimum": 0 } + "MemoryDistillationTurnInterval": { "type": "integer", "minimum": 0 }, + "MinimumRecallCompositeScore": { + "type": ["number", "null"], + "minimum": 0, + "description": "Optional override for the minimum composite score a memory must reach to be injected via automatic recall. When omitted, the coordinator uses a built-in default. Power-user knob; set to 0 to effectively disable the floor." + } }, "additionalProperties": false } diff --git a/src/Netclaw.Configuration/SessionConfig.cs b/src/Netclaw.Configuration/SessionConfig.cs index ab84650c7..67682fa63 100644 --- a/src/Netclaw.Configuration/SessionConfig.cs +++ b/src/Netclaw.Configuration/SessionConfig.cs @@ -131,6 +131,7 @@ private static SessionTuning BindTuning(IConfigurationSection section) MemoryDistillationTurnInterval = ResolveValue(tuningSection, section, nameof(SessionTuning.MemoryDistillationTurnInterval), nested.MemoryDistillationTurnInterval), MaxToolDescriptionChars = ResolveValue(tuningSection, section, nameof(SessionTuning.MaxToolDescriptionChars), nested.MaxToolDescriptionChars), MaxToolSchemaWarnChars = ResolveValue(tuningSection, section, nameof(SessionTuning.MaxToolSchemaWarnChars), nested.MaxToolSchemaWarnChars), + MinimumRecallCompositeScore = ResolveValue(tuningSection, section, nameof(SessionTuning.MinimumRecallCompositeScore), nested.MinimumRecallCompositeScore), }; } diff --git a/src/Netclaw.Configuration/SessionTuning.cs b/src/Netclaw.Configuration/SessionTuning.cs index cf5476703..97409cdc0 100644 --- a/src/Netclaw.Configuration/SessionTuning.cs +++ b/src/Netclaw.Configuration/SessionTuning.cs @@ -74,6 +74,21 @@ public sealed record SessionTuning /// public bool DeterministicRetrievalEnabled { get; init; } = true; + /// + /// Optional override for the minimum composite score a memory must reach + /// before it is injected into a turn via automatic recall. Candidates + /// below this floor are dropped silently, which lets automatic recall + /// return zero items when nothing in the memory store is a strong enough + /// match for the current query. + /// + /// When null (the default), the coordinator uses its own baked-in floor. + /// This property is a power-user knob: lower values let weaker matches + /// through (increasing the risk of unrelated-memory pollution), higher + /// values are stricter (at the cost of erasing legitimate marginal + /// matches). Set to 0 to effectively disable the floor. + /// + public double? MinimumRecallCompositeScore { get; init; } + /// /// Number of completed turns between memory distillation triggers. /// When set, the observer distills every N turns regardless of idle state,