From d986e1f6ec65f844ead26bf1704c64150cece14d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=8E=A7=20RicherTunes=20=F0=9F=8E=A7?= Date: Mon, 16 Feb 2026 21:35:28 -0500 Subject: [PATCH 1/3] chore: bump Common submodule to include triage contracts Picks up PR #380 (TriageReasonCodes, ConfidenceBand enum) and PR #381 (Phase 12-17 evidence blocks in canonical roadmap). Co-Authored-By: Claude Opus 4.6 --- ext/Lidarr.Plugin.Common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/Lidarr.Plugin.Common b/ext/Lidarr.Plugin.Common index 316812f3..1bacd6df 160000 --- a/ext/Lidarr.Plugin.Common +++ b/ext/Lidarr.Plugin.Common @@ -1 +1 @@ -Subproject commit 316812f36565ab9eb6875bf8431a1b446482a3bd +Subproject commit 1bacd6df9fee123722cd3683e57897947388e385 From 9c55ca675b15f8cbc7a752b510000923afa05956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=8E=A7=20RicherTunes=20=F0=9F=8E=A7?= Date: Mon, 16 Feb 2026 21:37:39 -0500 Subject: [PATCH 2/3] refactor(triage): import Common's TriageReasonCodes and ConfidenceBand Replace local ReasonCodes inner class with Common's canonical TriageReasonCodes constants. Use Common's ConfidenceBand enum for classification (output remains lowercase string for API compat). Zero behavior change: all 25 triage/calibration/golden fixture tests pass with identical outputs. Co-Authored-By: Claude Opus 4.6 --- .../Core/RecommendationTriageAdvisor.cs | 32 +++++++------------ .../Core/RecommendationTriageAdvisorTests.cs | 7 ++-- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs b/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs index 645f0c87..0567540d 100644 --- a/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs +++ b/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs @@ -1,25 +1,16 @@ using System; using System.Collections.Generic; using System.Linq; +using Lidarr.Plugin.Common.Abstractions.Triage; using NzbDrone.Core.ImportLists.Brainarr; using NzbDrone.Core.ImportLists.Brainarr.Models; using NzbDrone.Core.ImportLists.Brainarr.Services.Support; +using CommonBand = Lidarr.Plugin.Common.Abstractions.Triage.ConfidenceBand; namespace NzbDrone.Core.ImportLists.Brainarr.Services.Core { internal sealed class RecommendationTriageAdvisor { - internal static class ReasonCodes - { - public const string ConfidenceBelowThreshold = "CONFIDENCE_BELOW_THRESHOLD"; - public const string ConfidenceFarBelowThreshold = "CONFIDENCE_FAR_BELOW_THRESHOLD"; - public const string MissingRequiredMbids = "MISSING_REQUIRED_MBIDS"; - public const string DuplicateSignal = "DUPLICATE_SIGNAL"; - public const string HighConfidenceWithMbid = "HIGH_CONFIDENCE_WITH_MBID"; - public const string ConsistentSignals = "CONSISTENT_SIGNALS"; - public const string CalibrationApplied = "CALIBRATION_APPLIED"; - public const string LowCalibrationProvider = "LOW_CALIBRATION_PROVIDER"; - } private static readonly string[] DuplicateSignals = { @@ -60,7 +51,7 @@ void AddReason(string code, string message, int weight) { var calibrated = profile.Calibrate(confidence); AddReason( - ReasonCodes.CalibrationApplied, + TriageReasonCodes.CalibrationApplied, $"provider {profile.ProviderName} calibration: {confidence:F2} -> {calibrated:F2} (scale={profile.Scale:F2}, bias={profile.Bias:F2})", 0); confidence = calibrated; @@ -68,7 +59,7 @@ void AddReason(string code, string message, int weight) if (profile.QualityTier < 0.6) { AddReason( - ReasonCodes.LowCalibrationProvider, + TriageReasonCodes.LowCalibrationProvider, $"provider {profile.ProviderName} has low quality tier ({profile.QualityTier:F2})", 1); } @@ -78,14 +69,14 @@ void AddReason(string code, string message, int weight) if (confidence < minConfidence) { AddReason( - ReasonCodes.ConfidenceBelowThreshold, + TriageReasonCodes.ConfidenceBelowThreshold, $"confidence {confidence:F2} below threshold {minConfidence:F2}", 2); } if (confidence < (minConfidence - 0.15)) { - AddReason(ReasonCodes.ConfidenceFarBelowThreshold, "confidence substantially below threshold", 2); + AddReason(TriageReasonCodes.ConfidenceFarBelowThreshold, "confidence substantially below threshold", 2); } if (settings.RequireMbids) @@ -96,13 +87,13 @@ void AddReason(string code, string message, int weight) if (artistMissing || (needsAlbumMbid && albumMissing)) { - AddReason(ReasonCodes.MissingRequiredMbids, "missing required MusicBrainz identifiers", 2); + AddReason(TriageReasonCodes.MissingRequiredMbids, "missing required MusicBrainz identifiers", 2); } } if (ContainsDuplicateSignal(item.Reason) || ContainsDuplicateSignal(item.Notes)) { - AddReason(ReasonCodes.DuplicateSignal, "duplicate-like signal in recommendation rationale", 3); + AddReason(TriageReasonCodes.DuplicateSignal, "duplicate-like signal in recommendation rationale", 3); } if (confidence >= 0.9 && !string.IsNullOrWhiteSpace(item.ArtistMusicBrainzId)) @@ -110,15 +101,16 @@ void AddReason(string code, string message, int weight) var reducedBy = Math.Min(1, riskScore); if (reducedBy > 0) { - AddReason(ReasonCodes.HighConfidenceWithMbid, "high confidence with artist MBID present", -reducedBy); + AddReason(TriageReasonCodes.HighConfidenceWithMbid, "high confidence with artist MBID present", -reducedBy); } } var suggestedAction = riskScore >= 6 ? "reject" : riskScore >= 3 ? "review" : "accept"; - var confidenceBand = confidence >= 0.8 ? "high" : confidence >= 0.6 ? "medium" : "low"; + var band = confidence >= 0.8 ? CommonBand.High : confidence >= 0.6 ? CommonBand.Medium : CommonBand.Low; + var confidenceBand = band.ToString().ToLowerInvariant(); if (detailedReasons.Count == 0) { - detailedReasons.Add(new ReviewTriageReason(ReasonCodes.ConsistentSignals, "signals look consistent for queue approval", 0)); + detailedReasons.Add(new ReviewTriageReason(TriageReasonCodes.ConsistentSignals, "signals look consistent for queue approval", 0)); } return new ReviewTriageResult(suggestedAction, confidenceBand, riskScore, detailedReasons, provider?.ToString()); diff --git a/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs b/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs index 016ffa46..b0b93b4c 100644 --- a/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs +++ b/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs @@ -1,4 +1,5 @@ using FluentAssertions; +using Lidarr.Plugin.Common.Abstractions.Triage; using NzbDrone.Core.ImportLists.Brainarr; using NzbDrone.Core.ImportLists.Brainarr.Services.Core; using NzbDrone.Core.ImportLists.Brainarr.Services.Support; @@ -32,7 +33,7 @@ public void Analyze_ShouldSuggestReject_ForLowConfidenceDuplicateSignals() result.SuggestedAction.Should().Be("reject"); result.RiskScore.Should().BeGreaterOrEqualTo(6); result.Reasons.Should().Contain(x => x.Contains("duplicate")); - result.ReasonCodes.Should().Contain(RecommendationTriageAdvisor.ReasonCodes.DuplicateSignal); + result.ReasonCodes.Should().Contain(TriageReasonCodes.DuplicateSignal); result.DetailedReasons.Should().Contain(x => x.Weight > 0); } @@ -60,7 +61,7 @@ public void Analyze_ShouldSuggestAccept_ForHighConfidenceWithMbids() result.SuggestedAction.Should().Be("accept"); result.ConfidenceBand.Should().Be("high"); - result.ReasonCodes.Should().Contain(RecommendationTriageAdvisor.ReasonCodes.ConsistentSignals); + result.ReasonCodes.Should().Contain(TriageReasonCodes.ConsistentSignals); result.RiskScore.Should().Be(0); } @@ -85,7 +86,7 @@ public void Analyze_ShouldIncludeNegativeWeightReason_WhenHighConfidenceOffsetsR var result = advisor.Analyze(item, settings); result.SuggestedAction.Should().Be("accept"); - result.ReasonCodes.Should().Contain(RecommendationTriageAdvisor.ReasonCodes.HighConfidenceWithMbid); + result.ReasonCodes.Should().Contain(TriageReasonCodes.HighConfidenceWithMbid); result.DetailedReasons.Should().Contain(x => x.Weight < 0); } } From 27ae78aededcfdc2a5e12ce33a485eb6e55c8df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=8E=A7=20RicherTunes=20=F0=9F=8E=A7?= Date: Mon, 16 Feb 2026 21:51:17 -0500 Subject: [PATCH 3/3] feat(triage): add EnableProviderCalibration feature flag Add FieldDefinition(43) EnableProviderCalibration setting (default=true) that controls whether per-provider confidence calibration is applied to triage scores. When disabled, the orchestrator passes null for the provider parameter, resulting in raw uncalibrated scores. Add CalibrationDisabled_ReturnsRawScores test verifying the flag behavior: - provider=null yields no CALIBRATION_APPLIED reason code - provider=Ollama yields CALIBRATION_APPLIED + LOW_CALIBRATION_PROVIDER 2415/2415 tests pass (1 known flaky rate limiter test excluded). Co-Authored-By: Claude Opus 4.6 --- Brainarr.Plugin/BrainarrSettings.Discovery.cs | 5 +++ .../Services/Core/BrainarrOrchestrator.cs | 2 +- Brainarr.Plugin/packages.lock.json | 4 +-- .../Core/RecommendationTriageAdvisorTests.cs | 35 +++++++++++++++++++ Brainarr.Tests/packages.lock.json | 4 +-- .../packages.lock.json | 4 +-- .../packages.lock.json | 4 +-- 7 files changed, 49 insertions(+), 9 deletions(-) diff --git a/Brainarr.Plugin/BrainarrSettings.Discovery.cs b/Brainarr.Plugin/BrainarrSettings.Discovery.cs index e123fdfe..dd480b6a 100644 --- a/Brainarr.Plugin/BrainarrSettings.Discovery.cs +++ b/Brainarr.Plugin/BrainarrSettings.Discovery.cs @@ -235,6 +235,11 @@ public bool EnableAutoDetection HelpLink = "https://github.com/RicherTunes/Brainarr/wiki/Review-Queue")] public int ReviewActionCooldownMinutes { get; set; } = 15; + [FieldDefinition(43, Label = "Enable Provider Calibration", Type = FieldType.Checkbox, Advanced = true, Hidden = HiddenType.Hidden, + HelpText = "Apply per-provider confidence calibration to triage scores. Disable for raw uncalibrated scores.", + HelpLink = "https://github.com/RicherTunes/Brainarr/wiki/Confidence-Calibration")] + public bool EnableProviderCalibration { get; set; } = true; + // Observability (hidden preview) [FieldDefinition(16, Label = "Observability (Preview)", Type = FieldType.TagSelect, Advanced = true, HelpText = "Compact preview of provider/model latency, errors and throttles.", diff --git a/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs b/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs index 98243f5d..ce161bbd 100644 --- a/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs +++ b/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs @@ -414,7 +414,7 @@ public object HandleAction(string action, IDictionary query, Bra "review/simulateapply" => _reviewQueueHandler.SimulateReviewApply(settings, query), "review/applytriage" => _reviewQueueHandler.ApplyTriageSuggestions(settings, query), "review/rollbacktriage" => _reviewQueueHandler.RollbackTriageApplication(query), - "review/explain" => _reviewQueueHandler.ExplainItem(settings, query, settings?.Provider), + "review/explain" => _reviewQueueHandler.ExplainItem(settings, query, settings?.EnableProviderCalibration == true ? settings?.Provider : null), "review/clear" => _reviewQueueHandler.ClearApprovalSelections(settings), "review/rejectselected" => _reviewQueueHandler.RejectOrNeverSelected(settings, query, ReviewQueueService.ReviewStatus.Rejected), "review/neverselected" => _reviewQueueHandler.RejectOrNeverSelected(settings, query, ReviewQueueService.ReviewStatus.Never), diff --git a/Brainarr.Plugin/packages.lock.json b/Brainarr.Plugin/packages.lock.json index af068ad3..42b5cac1 100644 --- a/Brainarr.Plugin/packages.lock.json +++ b/Brainarr.Plugin/packages.lock.json @@ -403,7 +403,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -480,4 +480,4 @@ } } } -} +} \ No newline at end of file diff --git a/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs b/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs index b0b93b4c..a0c5cb1a 100644 --- a/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs +++ b/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs @@ -1,6 +1,7 @@ using FluentAssertions; using Lidarr.Plugin.Common.Abstractions.Triage; using NzbDrone.Core.ImportLists.Brainarr; +using NzbDrone.Core.ImportLists.Brainarr.Configuration; using NzbDrone.Core.ImportLists.Brainarr.Services.Core; using NzbDrone.Core.ImportLists.Brainarr.Services.Support; using Xunit; @@ -89,5 +90,39 @@ public void Analyze_ShouldIncludeNegativeWeightReason_WhenHighConfidenceOffsetsR result.ReasonCodes.Should().Contain(TriageReasonCodes.HighConfidenceWithMbid); result.DetailedReasons.Should().Contain(x => x.Weight < 0); } + + [Fact] + public void CalibrationDisabled_ReturnsRawScores() + { + var advisor = new RecommendationTriageAdvisor(); + var settings = new BrainarrSettings + { + MinConfidence = 0.7, + RequireMbids = false + }; + + var item = new ReviewQueueService.ReviewItem + { + Artist = "A", + Album = "B", + Confidence = 0.75, + ArtistMusicBrainzId = "artist-mbid" + }; + + // With calibration disabled (provider=null), raw scores are used + var rawResult = advisor.Analyze(item, settings, provider: null); + + // With calibration enabled (Ollama has Scale=0.80, Bias=0.05), scores differ + var calibratedResult = advisor.Analyze(item, settings, provider: AIProvider.Ollama); + + // Calibrated result should have the CalibrationApplied reason code + calibratedResult.ReasonCodes.Should().Contain(TriageReasonCodes.CalibrationApplied); + + // Raw result should NOT have the CalibrationApplied reason code + rawResult.ReasonCodes.Should().NotContain(TriageReasonCodes.CalibrationApplied); + + // The calibrated result should also flag the low-quality provider + calibratedResult.ReasonCodes.Should().Contain(TriageReasonCodes.LowCalibrationProvider); + } } } diff --git a/Brainarr.Tests/packages.lock.json b/Brainarr.Tests/packages.lock.json index 6d6d9f0d..3bc148c4 100644 --- a/Brainarr.Tests/packages.lock.json +++ b/Brainarr.Tests/packages.lock.json @@ -540,7 +540,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -665,4 +665,4 @@ } } } -} +} \ No newline at end of file diff --git a/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json b/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json index 360860d5..16f6fd47 100644 --- a/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json +++ b/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json @@ -464,7 +464,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -585,4 +585,4 @@ } } } -} +} \ No newline at end of file diff --git a/tests/Brainarr.TestKit.Providers/packages.lock.json b/tests/Brainarr.TestKit.Providers/packages.lock.json index f8d3a6d1..6f03edea 100644 --- a/tests/Brainarr.TestKit.Providers/packages.lock.json +++ b/tests/Brainarr.TestKit.Providers/packages.lock.json @@ -369,7 +369,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -490,4 +490,4 @@ } } } -} +} \ No newline at end of file