diff --git a/Brainarr.Tests/Services/Resilience/CircuitBreakerCharacterizationTests.cs b/Brainarr.Tests/Services/Resilience/CircuitBreakerCharacterizationTests.cs
new file mode 100644
index 00000000..d9969d0d
--- /dev/null
+++ b/Brainarr.Tests/Services/Resilience/CircuitBreakerCharacterizationTests.cs
@@ -0,0 +1,574 @@
+using System;
+using System.Net.Http;
+using System.Threading;
+using System.Threading.Tasks;
+using FluentAssertions;
+using NLog;
+using NzbDrone.Core.ImportLists.Brainarr.Services.Resilience;
+using Xunit;
+
+namespace Brainarr.Tests.Services.Resilience
+{
+ ///
+ /// Characterization tests that lock down the current circuit breaker behavior.
+ /// These tests document existing semantics to make WS4.2 migration decisions objective.
+ ///
+ [Trait("Category", "Unit")]
+ public sealed class CircuitBreakerCharacterizationTests
+ {
+ private static Logger L => LogManager.GetCurrentClassLogger();
+
+ [Fact]
+ public void Starts_Closed()
+ {
+ var cb = new CircuitBreaker("ai:test:model", CircuitBreakerOptions.Default, L);
+
+ cb.State.Should().Be(CircuitState.Closed);
+ cb.ConsecutiveFailures.Should().Be(0);
+ cb.FailureRate.Should().Be(0);
+ }
+
+ [Fact]
+ public async Task Opens_After_Handled_Exception_And_Blocks_While_Open()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ HalfOpenSuccessThreshold = 1,
+ SamplingWindowSize = 10,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException("timeout"))));
+
+ cb.State.Should().Be(CircuitState.Open);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromResult(42)));
+ }
+
+ [Fact]
+ public async Task HalfOpen_Success_Closes_When_BreakDuration_Elapsed()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.Zero,
+ HalfOpenSuccessThreshold = 1,
+ SamplingWindowSize = 10,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException("timeout"))));
+
+ cb.State.Should().Be(CircuitState.Open);
+
+ var result = await cb.ExecuteAsync(() => Task.FromResult(42));
+ result.Should().Be(42);
+ cb.State.Should().Be(CircuitState.Closed);
+ }
+
+ [Fact]
+ public async Task ExecuteWithFallback_Returns_Fallback_When_Open_And_Does_Not_Invoke_Operation()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ HalfOpenSuccessThreshold = 1,
+ SamplingWindowSize = 10,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException("timeout"))));
+
+ var invoked = false;
+ var fallback = await cb.ExecuteWithFallbackAsync(
+ () =>
+ {
+ invoked = true;
+ return Task.FromResult(123);
+ },
+ fallbackValue: 7);
+
+ fallback.Should().Be(7);
+ invoked.Should().BeFalse();
+ }
+
+ [Fact]
+ public async Task CircuitOpened_And_Closed_Events_Fire()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.Zero,
+ HalfOpenSuccessThreshold = 1,
+ SamplingWindowSize = 10,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ CircuitBreakerEventArgs? opened = null;
+ CircuitBreakerEventArgs? closed = null;
+
+ cb.CircuitOpened += (_, args) => opened = args;
+ cb.CircuitClosed += (_, args) => closed = args;
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new HttpRequestException("network"))));
+
+ opened.Should().NotBeNull();
+ opened!.ResourceName.Should().Be("ai:test:model");
+ opened.State.Should().Be(CircuitState.Open);
+
+ await cb.ExecuteAsync(() => Task.FromResult(1));
+
+ closed.Should().NotBeNull();
+ closed!.ResourceName.Should().Be("ai:test:model");
+ closed.State.Should().Be(CircuitState.Closed);
+ }
+
+ [Fact]
+ public async Task Reset_Closes_And_Clears_Statistics()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ HalfOpenSuccessThreshold = 1,
+ SamplingWindowSize = 10,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException("timeout"))));
+
+ cb.State.Should().Be(CircuitState.Open);
+ cb.Reset();
+
+ cb.State.Should().Be(CircuitState.Closed);
+ cb.ConsecutiveFailures.Should().Be(0);
+ cb.GetStatistics().TotalOperations.Should().Be(0);
+ }
+
+ #region Keying Scheme Tests
+
+ [Fact]
+ public void ResourceName_Uses_Keying_Format()
+ {
+ // The keying format is "ai:{provider}:{modelId}" as established in BreakerRegistry
+ var cb = new CircuitBreaker("ai:openai:gpt-4", CircuitBreakerOptions.Default, L);
+ cb.ResourceName.Should().Be("ai:openai:gpt-4");
+ }
+
+ [Theory]
+ [InlineData("ai:anthropic:claude-3-opus")]
+ [InlineData("ai:ollama:llama2")]
+ [InlineData("ai:deepseek:deepseek-chat")]
+ public void ResourceName_Preserved_For_Any_Provider_Model_Combination(string resourceName)
+ {
+ var cb = new CircuitBreaker(resourceName, CircuitBreakerOptions.Default, L);
+ cb.ResourceName.Should().Be(resourceName);
+ }
+
+ #endregion
+
+ #region Failure Classification Tests
+
+ [Fact]
+ public async Task TaskCanceledException_Is_Treated_As_Failure()
+ {
+ // CURRENT BEHAVIOR: TaskCanceledException trips the breaker
+ // This may be surprising - cancellation is treated same as failure
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TaskCanceledException("cancelled"))));
+
+ cb.State.Should().Be(CircuitState.Open, "TaskCanceledException is treated as a failure");
+ cb.ConsecutiveFailures.Should().Be(1);
+ }
+
+ [Fact]
+ public async Task HttpRequestException_Is_Treated_As_Failure()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new HttpRequestException("network error"))));
+
+ cb.State.Should().Be(CircuitState.Open);
+ cb.ConsecutiveFailures.Should().Be(1);
+ }
+
+ [Fact]
+ public async Task Client_Error_With_BadRequest_Does_Not_Trip_Breaker()
+ {
+ // CURRENT BEHAVIOR: HttpRequestException with "4" AND "Bad Request" in message is excluded
+ // This is brittle string-based client error detection (should use status codes in future)
+ //
+ // NOTE: This test is intentionally message-coupled because production uses message-based
+ // detection (ex.Message.Contains("4") && ex.Message.Contains("Bad Request")). Do not
+ // "simplify" to status-code checks unless production is also refactored.
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // HttpRequestException (normally a handled type) with "4" + "Bad Request" - excluded by string match
+ var clientError = new HttpRequestException("400 Bad Request");
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(clientError)));
+
+ cb.State.Should().Be(CircuitState.Closed, "Client errors excluded by string matching");
+ cb.ConsecutiveFailures.Should().Be(0);
+ }
+
+ [Fact]
+ public async Task Generic_Exception_Without_BadRequest_Does_Not_Trip_Breaker()
+ {
+ // Non-handled exceptions pass through without recording failure
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ var genericError = new InvalidOperationException("some logic error");
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(genericError)));
+
+ cb.State.Should().Be(CircuitState.Closed, "Non-handled exceptions don't trip breaker");
+ cb.ConsecutiveFailures.Should().Be(0, "Non-handled exceptions don't record as failures");
+ }
+
+ #endregion
+
+ #region Consecutive Failures Threshold Tests
+
+ [Fact]
+ public async Task Opens_After_Consecutive_Failures_Threshold()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 5, // Default
+ FailureRateThreshold = 1.0, // Disable rate-based opening
+ BreakDuration = TimeSpan.FromMinutes(10),
+ MinimumThroughput = 100 // High minimum to prevent rate-based opening
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // 4 failures - should remain closed
+ for (int i = 0; i < 4; i++)
+ {
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ }
+ cb.State.Should().Be(CircuitState.Closed);
+ cb.ConsecutiveFailures.Should().Be(4);
+
+ // 5th failure - opens the circuit
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+
+ cb.State.Should().Be(CircuitState.Open);
+ cb.ConsecutiveFailures.Should().Be(5);
+ }
+
+ [Fact]
+ public async Task Success_Resets_Consecutive_Failure_Counter()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 5,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.FromMinutes(10),
+ MinimumThroughput = 100
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // 3 failures
+ for (int i = 0; i < 3; i++)
+ {
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ }
+ cb.ConsecutiveFailures.Should().Be(3);
+
+ // 1 success resets counter
+ await cb.ExecuteAsync(() => Task.FromResult(42));
+ cb.ConsecutiveFailures.Should().Be(0);
+
+ // Circuit should still be closed
+ cb.State.Should().Be(CircuitState.Closed);
+ }
+
+ #endregion
+
+ #region Failure Rate Threshold Tests
+
+ [Fact]
+ public async Task Opens_When_Failure_Rate_Exceeds_Threshold_And_Minimum_Throughput_Met()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 100, // High to prevent consecutive-based opening
+ FailureRateThreshold = 0.5, // 50%
+ BreakDuration = TimeSpan.FromMinutes(10),
+ SamplingWindowSize = 20,
+ MinimumThroughput = 10
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // 5 successes
+ for (int i = 0; i < 5; i++)
+ {
+ await cb.ExecuteAsync(() => Task.FromResult(i));
+ }
+ cb.State.Should().Be(CircuitState.Closed);
+
+ // 4 failures (4/9 = 44% < 50%, circuit stays closed)
+ for (int i = 0; i < 4; i++)
+ {
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ }
+ cb.State.Should().Be(CircuitState.Closed, "9 ops at 44% failure rate - below threshold");
+
+ // 1 more failure (5/10 = 50%, meets threshold and minimum throughput)
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+
+ cb.State.Should().Be(CircuitState.Open, "10 ops at 50% failure rate - meets threshold");
+ cb.FailureRate.Should().BeApproximately(0.5, 0.01);
+ }
+
+ [Fact]
+ public async Task Does_Not_Open_On_High_Failure_Rate_Below_Minimum_Throughput()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 100, // High to prevent consecutive-based opening
+ FailureRateThreshold = 0.5, // 50%
+ BreakDuration = TimeSpan.FromMinutes(10),
+ SamplingWindowSize = 20,
+ MinimumThroughput = 10
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // 1 success, 3 failures (75% failure rate but only 4 ops < 10 minimum)
+ await cb.ExecuteAsync(() => Task.FromResult(1));
+ for (int i = 0; i < 3; i++)
+ {
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ }
+
+ cb.FailureRate.Should().BeApproximately(0.75, 0.01);
+ cb.State.Should().Be(CircuitState.Closed, "Below minimum throughput - rate-based opening disabled");
+ }
+
+ #endregion
+
+ #region Half-Open State Transition Tests
+
+ [Fact]
+ public async Task HalfOpen_Closes_After_Configured_Successes()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.Zero, // Immediate transition to half-open
+ HalfOpenSuccessThreshold = 3,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // Open the circuit
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ cb.State.Should().Be(CircuitState.Open);
+
+ // First success - transitions to half-open, stays half-open
+ await cb.ExecuteAsync(() => Task.FromResult(1));
+ cb.State.Should().Be(CircuitState.HalfOpen, "1 success in half-open, need 3 to close");
+
+ // Second success
+ await cb.ExecuteAsync(() => Task.FromResult(2));
+ cb.State.Should().Be(CircuitState.HalfOpen, "2 successes in half-open, need 3 to close");
+
+ // Third success - closes circuit
+ await cb.ExecuteAsync(() => Task.FromResult(3));
+ cb.State.Should().Be(CircuitState.Closed, "3 successes closes the circuit");
+ }
+
+ [Fact]
+ public async Task HalfOpen_Failure_Immediately_Reopens()
+ {
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 1,
+ FailureRateThreshold = 1.0,
+ BreakDuration = TimeSpan.Zero,
+ HalfOpenSuccessThreshold = 3,
+ MinimumThroughput = 1
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // Open the circuit
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ cb.State.Should().Be(CircuitState.Open);
+
+ // 1 success to enter half-open
+ await cb.ExecuteAsync(() => Task.FromResult(1));
+ cb.State.Should().Be(CircuitState.HalfOpen);
+
+ // Failure in half-open immediately reopens
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+
+ cb.State.Should().Be(CircuitState.Open, "Any failure in half-open reopens the circuit");
+ }
+
+ #endregion
+
+ #region Windowing / CircularBuffer Tests
+
+ [Fact]
+ public async Task CircularBuffer_Wraps_And_Maintains_Accurate_FailureRate()
+ {
+ // This test verifies that failure rate is calculated over the sliding window,
+ // and that old operations get pushed out as new ones arrive.
+ var options = new CircuitBreakerOptions
+ {
+ FailureThreshold = 100, // High to prevent consecutive-based opening
+ FailureRateThreshold = 0.7, // 70% - higher threshold to observe window behavior
+ BreakDuration = TimeSpan.FromMinutes(10),
+ SamplingWindowSize = 5, // Small window for testing
+ MinimumThroughput = 3
+ };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ // Phase 1: Fill buffer with 3 successes, 2 failures = 40% failure rate
+ // Window: [S, S, S, F, F]
+ for (int i = 0; i < 3; i++) await cb.ExecuteAsync(() => Task.FromResult(i));
+ for (int i = 0; i < 2; i++)
+ {
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+ }
+
+ cb.FailureRate.Should().BeApproximately(0.4, 0.01);
+ cb.State.Should().Be(CircuitState.Closed);
+
+ // Phase 2: Add 1 more failure - pushes out oldest success
+ // Window: [S, S, F, F, F] = 60% failure rate (still below 70%)
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+
+ cb.FailureRate.Should().BeApproximately(0.6, 0.01);
+ cb.State.Should().Be(CircuitState.Closed, "60% is below 70% threshold");
+
+ // Phase 3: Add 1 more failure - pushes out another success
+ // Window: [S, F, F, F, F] = 80% failure rate (exceeds 70%)
+ await Assert.ThrowsAsync(async () =>
+ await cb.ExecuteAsync(() => Task.FromException(new TimeoutException())));
+
+ cb.FailureRate.Should().BeGreaterThanOrEqualTo(0.7);
+ cb.State.Should().Be(CircuitState.Open, "Window wrapped, failure rate now exceeds threshold");
+ }
+
+ [Fact]
+ public void GetStatistics_Returns_Correct_Initial_State()
+ {
+ var options = new CircuitBreakerOptions { SamplingWindowSize = 10 };
+ var cb = new CircuitBreaker("ai:test:model", options, L);
+
+ var stats = cb.GetStatistics();
+ stats.ResourceName.Should().Be("ai:test:model");
+ stats.State.Should().Be(CircuitState.Closed);
+ stats.TotalOperations.Should().Be(0);
+ stats.ConsecutiveFailures.Should().Be(0);
+ stats.FailureRate.Should().Be(0);
+ stats.NextHalfOpenAttempt.Should().BeNull();
+ }
+
+ #endregion
+
+ #region Configuration Constants Tests
+
+ [Fact]
+ public void Default_Options_Use_Brainarr_Constants()
+ {
+ // Document the default configuration values from BrainarrConstants
+ var defaults = CircuitBreakerOptions.Default;
+
+ // These values come from BrainarrConstants
+ defaults.FailureThreshold.Should().Be(5, "consecutive failures to open");
+ defaults.FailureRateThreshold.Should().Be(0.5, "50% failure rate threshold");
+ defaults.BreakDuration.Should().Be(TimeSpan.FromSeconds(30), "30 second open duration");
+ defaults.HalfOpenSuccessThreshold.Should().Be(3, "3 successes to close from half-open");
+ defaults.SamplingWindowSize.Should().Be(20, "20 operation sampling window");
+ defaults.MinimumThroughput.Should().Be(10, "10 minimum operations for rate-based opening");
+ }
+
+ [Fact]
+ public void Aggressive_Options_Presets()
+ {
+ var aggressive = CircuitBreakerOptions.Aggressive;
+
+ aggressive.FailureThreshold.Should().Be(3);
+ aggressive.FailureRateThreshold.Should().Be(0.3);
+ aggressive.BreakDuration.Should().Be(TimeSpan.FromMinutes(5));
+ }
+
+ [Fact]
+ public void Lenient_Options_Presets()
+ {
+ var lenient = CircuitBreakerOptions.Lenient;
+
+ lenient.FailureThreshold.Should().Be(10);
+ lenient.FailureRateThreshold.Should().Be(0.75);
+ lenient.BreakDuration.Should().Be(TimeSpan.FromSeconds(30));
+ }
+
+ #endregion
+ }
+}
+