diff --git a/Brainarr.Plugin/BrainarrSettings.Discovery.cs b/Brainarr.Plugin/BrainarrSettings.Discovery.cs index e123fdfe..dd480b6a 100644 --- a/Brainarr.Plugin/BrainarrSettings.Discovery.cs +++ b/Brainarr.Plugin/BrainarrSettings.Discovery.cs @@ -235,6 +235,11 @@ public bool EnableAutoDetection HelpLink = "https://github.com/RicherTunes/Brainarr/wiki/Review-Queue")] public int ReviewActionCooldownMinutes { get; set; } = 15; + [FieldDefinition(43, Label = "Enable Provider Calibration", Type = FieldType.Checkbox, Advanced = true, Hidden = HiddenType.Hidden, + HelpText = "Apply per-provider confidence calibration to triage scores. Disable for raw uncalibrated scores.", + HelpLink = "https://github.com/RicherTunes/Brainarr/wiki/Confidence-Calibration")] + public bool EnableProviderCalibration { get; set; } = true; + // Observability (hidden preview) [FieldDefinition(16, Label = "Observability (Preview)", Type = FieldType.TagSelect, Advanced = true, HelpText = "Compact preview of provider/model latency, errors and throttles.", diff --git a/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs b/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs index 98243f5d..ce161bbd 100644 --- a/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs +++ b/Brainarr.Plugin/Services/Core/BrainarrOrchestrator.cs @@ -414,7 +414,7 @@ public object HandleAction(string action, IDictionary query, Bra "review/simulateapply" => _reviewQueueHandler.SimulateReviewApply(settings, query), "review/applytriage" => _reviewQueueHandler.ApplyTriageSuggestions(settings, query), "review/rollbacktriage" => _reviewQueueHandler.RollbackTriageApplication(query), - "review/explain" => _reviewQueueHandler.ExplainItem(settings, query, settings?.Provider), + "review/explain" => _reviewQueueHandler.ExplainItem(settings, query, settings?.EnableProviderCalibration == true ? settings?.Provider : null), "review/clear" => _reviewQueueHandler.ClearApprovalSelections(settings), "review/rejectselected" => _reviewQueueHandler.RejectOrNeverSelected(settings, query, ReviewQueueService.ReviewStatus.Rejected), "review/neverselected" => _reviewQueueHandler.RejectOrNeverSelected(settings, query, ReviewQueueService.ReviewStatus.Never), diff --git a/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs b/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs index 645f0c87..0567540d 100644 --- a/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs +++ b/Brainarr.Plugin/Services/Core/RecommendationTriageAdvisor.cs @@ -1,25 +1,16 @@ using System; using System.Collections.Generic; using System.Linq; +using Lidarr.Plugin.Common.Abstractions.Triage; using NzbDrone.Core.ImportLists.Brainarr; using NzbDrone.Core.ImportLists.Brainarr.Models; using NzbDrone.Core.ImportLists.Brainarr.Services.Support; +using CommonBand = Lidarr.Plugin.Common.Abstractions.Triage.ConfidenceBand; namespace NzbDrone.Core.ImportLists.Brainarr.Services.Core { internal sealed class RecommendationTriageAdvisor { - internal static class ReasonCodes - { - public const string ConfidenceBelowThreshold = "CONFIDENCE_BELOW_THRESHOLD"; - public const string ConfidenceFarBelowThreshold = "CONFIDENCE_FAR_BELOW_THRESHOLD"; - public const string MissingRequiredMbids = "MISSING_REQUIRED_MBIDS"; - public const string DuplicateSignal = "DUPLICATE_SIGNAL"; - public const string HighConfidenceWithMbid = "HIGH_CONFIDENCE_WITH_MBID"; - public const string ConsistentSignals = "CONSISTENT_SIGNALS"; - public const string CalibrationApplied = "CALIBRATION_APPLIED"; - public const string LowCalibrationProvider = "LOW_CALIBRATION_PROVIDER"; - } private static readonly string[] DuplicateSignals = { @@ -60,7 +51,7 @@ void AddReason(string code, string message, int weight) { var calibrated = profile.Calibrate(confidence); AddReason( - ReasonCodes.CalibrationApplied, + TriageReasonCodes.CalibrationApplied, $"provider {profile.ProviderName} calibration: {confidence:F2} -> {calibrated:F2} (scale={profile.Scale:F2}, bias={profile.Bias:F2})", 0); confidence = calibrated; @@ -68,7 +59,7 @@ void AddReason(string code, string message, int weight) if (profile.QualityTier < 0.6) { AddReason( - ReasonCodes.LowCalibrationProvider, + TriageReasonCodes.LowCalibrationProvider, $"provider {profile.ProviderName} has low quality tier ({profile.QualityTier:F2})", 1); } @@ -78,14 +69,14 @@ void AddReason(string code, string message, int weight) if (confidence < minConfidence) { AddReason( - ReasonCodes.ConfidenceBelowThreshold, + TriageReasonCodes.ConfidenceBelowThreshold, $"confidence {confidence:F2} below threshold {minConfidence:F2}", 2); } if (confidence < (minConfidence - 0.15)) { - AddReason(ReasonCodes.ConfidenceFarBelowThreshold, "confidence substantially below threshold", 2); + AddReason(TriageReasonCodes.ConfidenceFarBelowThreshold, "confidence substantially below threshold", 2); } if (settings.RequireMbids) @@ -96,13 +87,13 @@ void AddReason(string code, string message, int weight) if (artistMissing || (needsAlbumMbid && albumMissing)) { - AddReason(ReasonCodes.MissingRequiredMbids, "missing required MusicBrainz identifiers", 2); + AddReason(TriageReasonCodes.MissingRequiredMbids, "missing required MusicBrainz identifiers", 2); } } if (ContainsDuplicateSignal(item.Reason) || ContainsDuplicateSignal(item.Notes)) { - AddReason(ReasonCodes.DuplicateSignal, "duplicate-like signal in recommendation rationale", 3); + AddReason(TriageReasonCodes.DuplicateSignal, "duplicate-like signal in recommendation rationale", 3); } if (confidence >= 0.9 && !string.IsNullOrWhiteSpace(item.ArtistMusicBrainzId)) @@ -110,15 +101,16 @@ void AddReason(string code, string message, int weight) var reducedBy = Math.Min(1, riskScore); if (reducedBy > 0) { - AddReason(ReasonCodes.HighConfidenceWithMbid, "high confidence with artist MBID present", -reducedBy); + AddReason(TriageReasonCodes.HighConfidenceWithMbid, "high confidence with artist MBID present", -reducedBy); } } var suggestedAction = riskScore >= 6 ? "reject" : riskScore >= 3 ? "review" : "accept"; - var confidenceBand = confidence >= 0.8 ? "high" : confidence >= 0.6 ? "medium" : "low"; + var band = confidence >= 0.8 ? CommonBand.High : confidence >= 0.6 ? CommonBand.Medium : CommonBand.Low; + var confidenceBand = band.ToString().ToLowerInvariant(); if (detailedReasons.Count == 0) { - detailedReasons.Add(new ReviewTriageReason(ReasonCodes.ConsistentSignals, "signals look consistent for queue approval", 0)); + detailedReasons.Add(new ReviewTriageReason(TriageReasonCodes.ConsistentSignals, "signals look consistent for queue approval", 0)); } return new ReviewTriageResult(suggestedAction, confidenceBand, riskScore, detailedReasons, provider?.ToString()); diff --git a/Brainarr.Plugin/packages.lock.json b/Brainarr.Plugin/packages.lock.json index af068ad3..42b5cac1 100644 --- a/Brainarr.Plugin/packages.lock.json +++ b/Brainarr.Plugin/packages.lock.json @@ -403,7 +403,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -480,4 +480,4 @@ } } } -} +} \ No newline at end of file diff --git a/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs b/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs index 016ffa46..a0c5cb1a 100644 --- a/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs +++ b/Brainarr.Tests/Services/Core/RecommendationTriageAdvisorTests.cs @@ -1,5 +1,7 @@ using FluentAssertions; +using Lidarr.Plugin.Common.Abstractions.Triage; using NzbDrone.Core.ImportLists.Brainarr; +using NzbDrone.Core.ImportLists.Brainarr.Configuration; using NzbDrone.Core.ImportLists.Brainarr.Services.Core; using NzbDrone.Core.ImportLists.Brainarr.Services.Support; using Xunit; @@ -32,7 +34,7 @@ public void Analyze_ShouldSuggestReject_ForLowConfidenceDuplicateSignals() result.SuggestedAction.Should().Be("reject"); result.RiskScore.Should().BeGreaterOrEqualTo(6); result.Reasons.Should().Contain(x => x.Contains("duplicate")); - result.ReasonCodes.Should().Contain(RecommendationTriageAdvisor.ReasonCodes.DuplicateSignal); + result.ReasonCodes.Should().Contain(TriageReasonCodes.DuplicateSignal); result.DetailedReasons.Should().Contain(x => x.Weight > 0); } @@ -60,7 +62,7 @@ public void Analyze_ShouldSuggestAccept_ForHighConfidenceWithMbids() result.SuggestedAction.Should().Be("accept"); result.ConfidenceBand.Should().Be("high"); - result.ReasonCodes.Should().Contain(RecommendationTriageAdvisor.ReasonCodes.ConsistentSignals); + result.ReasonCodes.Should().Contain(TriageReasonCodes.ConsistentSignals); result.RiskScore.Should().Be(0); } @@ -85,8 +87,42 @@ public void Analyze_ShouldIncludeNegativeWeightReason_WhenHighConfidenceOffsetsR var result = advisor.Analyze(item, settings); result.SuggestedAction.Should().Be("accept"); - result.ReasonCodes.Should().Contain(RecommendationTriageAdvisor.ReasonCodes.HighConfidenceWithMbid); + result.ReasonCodes.Should().Contain(TriageReasonCodes.HighConfidenceWithMbid); result.DetailedReasons.Should().Contain(x => x.Weight < 0); } + + [Fact] + public void CalibrationDisabled_ReturnsRawScores() + { + var advisor = new RecommendationTriageAdvisor(); + var settings = new BrainarrSettings + { + MinConfidence = 0.7, + RequireMbids = false + }; + + var item = new ReviewQueueService.ReviewItem + { + Artist = "A", + Album = "B", + Confidence = 0.75, + ArtistMusicBrainzId = "artist-mbid" + }; + + // With calibration disabled (provider=null), raw scores are used + var rawResult = advisor.Analyze(item, settings, provider: null); + + // With calibration enabled (Ollama has Scale=0.80, Bias=0.05), scores differ + var calibratedResult = advisor.Analyze(item, settings, provider: AIProvider.Ollama); + + // Calibrated result should have the CalibrationApplied reason code + calibratedResult.ReasonCodes.Should().Contain(TriageReasonCodes.CalibrationApplied); + + // Raw result should NOT have the CalibrationApplied reason code + rawResult.ReasonCodes.Should().NotContain(TriageReasonCodes.CalibrationApplied); + + // The calibrated result should also flag the low-quality provider + calibratedResult.ReasonCodes.Should().Contain(TriageReasonCodes.LowCalibrationProvider); + } } } diff --git a/Brainarr.Tests/packages.lock.json b/Brainarr.Tests/packages.lock.json index 6d6d9f0d..3bc148c4 100644 --- a/Brainarr.Tests/packages.lock.json +++ b/Brainarr.Tests/packages.lock.json @@ -540,7 +540,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -665,4 +665,4 @@ } } } -} +} \ No newline at end of file diff --git a/ext/Lidarr.Plugin.Common b/ext/Lidarr.Plugin.Common index 316812f3..1bacd6df 160000 --- a/ext/Lidarr.Plugin.Common +++ b/ext/Lidarr.Plugin.Common @@ -1 +1 @@ -Subproject commit 316812f36565ab9eb6875bf8431a1b446482a3bd +Subproject commit 1bacd6df9fee123722cd3683e57897947388e385 diff --git a/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json b/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json index 360860d5..16f6fd47 100644 --- a/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json +++ b/tests/Brainarr.Providers.OpenAI.Tests/packages.lock.json @@ -464,7 +464,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -585,4 +585,4 @@ } } } -} +} \ No newline at end of file diff --git a/tests/Brainarr.TestKit.Providers/packages.lock.json b/tests/Brainarr.TestKit.Providers/packages.lock.json index f8d3a6d1..6f03edea 100644 --- a/tests/Brainarr.TestKit.Providers/packages.lock.json +++ b/tests/Brainarr.TestKit.Providers/packages.lock.json @@ -369,7 +369,7 @@ "lidarr.plugin.abstractions": { "type": "Project", "dependencies": { - "Microsoft.Extensions.Logging.Abstractions": "[8.0.1, )", + "Microsoft.Extensions.Logging.Abstractions": "[8.0.3, )", "System.Text.Json": "[8.0.6, )" } }, @@ -490,4 +490,4 @@ } } } -} +} \ No newline at end of file