diff --git a/src/coordination/azure/Akka.Coordination.Azure.Tests/AzureApiSpec.cs b/src/coordination/azure/Akka.Coordination.Azure.Tests/AzureApiSpec.cs index 5b3c7f3b9..f9d5d6ddd 100644 --- a/src/coordination/azure/Akka.Coordination.Azure.Tests/AzureApiSpec.cs +++ b/src/coordination/azure/Akka.Coordination.Azure.Tests/AzureApiSpec.cs @@ -104,5 +104,55 @@ public async Task ShouldRemoveLease() var response = await _underTest.RemoveLease(LeaseName); response.Should().Be(Done.Instance); } + + // Regression test for https://github.com/akkadotnet/Akka.Management/issues/3397 + // A second AzureApiImpl instance (_initialized = false) encountering a container that + // already exists must handle the 409 ContainerAlreadyExists gracefully instead of + // throwing a LeaseException that propagates up to the Split Brain Resolver. + [Fact(DisplayName = "Should handle ContainerAlreadyExists when a second instance starts")] + public async Task ShouldHandleContainerAlreadyExists() + { + // First instance creates the container and lease blob + var firstLease = await _underTest.ReadOrCreateLeaseResource(LeaseName); + firstLease.Owner.Should().BeNull(); + + // Second instance has _initialized = false, so ContainerClient() will call + // CreateAsync() and receive a 409 ContainerAlreadyExists from Azure. + // Before the fix, this threw LeaseException and crashed the lease actor. + var secondInstance = new AzureApiImpl(Sys, _settings); + var secondLease = await secondInstance.ReadOrCreateLeaseResource(LeaseName); + secondLease.Owner.Should().BeNull(); + secondLease.Version.Should().NotBeNull(); + } + + // Verifies that multiple independent AzureApiImpl instances can operate concurrently + // against the same container — the typical scenario in a multi-node Akka.NET cluster + // where each node creates its own AzureApiImpl. + [Fact(DisplayName = "Multiple instances should acquire different leases against same container")] + public async Task MultipleInstancesShouldAcquireDifferentLeases() + { + const string leaseName1 = "lease-multi-1"; + const string leaseName2 = "lease-multi-2"; + const string owner1 = "node-1"; + const string owner2 = "node-2"; + + var instance1 = new AzureApiImpl(Sys, _settings); + var instance2 = new AzureApiImpl(Sys, _settings); + + // Both instances create their leases (both will try CreateAsync on the container) + var lease1 = await instance1.ReadOrCreateLeaseResource(leaseName1); + var lease2 = await instance2.ReadOrCreateLeaseResource(leaseName2); + + // Both should succeed — one creates the container, the other gets 409 and handles it + lease1.Owner.Should().BeNull(); + lease2.Owner.Should().BeNull(); + + // Both instances should be able to update their respective leases + var update1 = await instance1.UpdateLeaseResource(leaseName1, owner1, lease1.Version, DateTimeOffset.UtcNow); + update1.Should().BeOfType>(); + + var update2 = await instance2.UpdateLeaseResource(leaseName2, owner2, lease2.Version, DateTimeOffset.UtcNow); + update2.Should().BeOfType>(); + } } } \ No newline at end of file diff --git a/src/coordination/azure/Akka.Coordination.Azure/Internal/AzureApiImpl.cs b/src/coordination/azure/Akka.Coordination.Azure/Internal/AzureApiImpl.cs index c18ec76f4..c71634ae8 100644 --- a/src/coordination/azure/Akka.Coordination.Azure/Internal/AzureApiImpl.cs +++ b/src/coordination/azure/Akka.Coordination.Azure/Internal/AzureApiImpl.cs @@ -21,6 +21,24 @@ namespace Akka.Coordination.Azure.Internal { + /// + /// Thrown when Azure Blob container initialization fails. + /// Used to distinguish container-level errors from blob-level + /// so that callers can retry appropriately. + /// + internal sealed class ContainerInitializationException : Exception + { + public int StatusCode { get; } + public string? ErrorCode { get; } + + public ContainerInitializationException(string message, int statusCode, string? errorCode, Exception innerException) + : base(message, innerException) + { + StatusCode = statusCode; + ErrorCode = errorCode; + } + } + internal sealed class AzureApiImpl: IAzureApi { private readonly AzureLeaseSettings _settings; @@ -73,10 +91,45 @@ private async Task ContainerClient() var client = serviceClient.GetBlobContainerClient(_settings.ContainerName); - // Make sure that `CreateIfNotExistsAsync()` only get called once for every AzureApi instance + // Ensure container exists. Only attempted once per AzureApiImpl instance. + // + // Uses CreateAsync() instead of CreateIfNotExistsAsync() because the latter has known + // Azure SDK bugs where it still throws RequestFailedException(409). + // See: https://github.com/Azure/azure-sdk-for-net/issues/28549 if (!_initialized) { - await client.CreateIfNotExistsAsync(); + try + { + await client.CreateAsync(); + } + catch (RequestFailedException ex) + { + switch ((HttpStatusCode)ex.Status) + { + case HttpStatusCode.Conflict when ex.ErrorCode == "ContainerAlreadyExists": + // Benign — container already exists from a previous run or another node. + _log.Debug("Container '{0}' already exists", _settings.ContainerName); + break; + + case HttpStatusCode.Conflict: + // ContainerBeingDeleted or other transient 409 — retriable + throw new ContainerInitializationException( + $"Container '{_settings.ContainerName}' creation conflict: {ex.ErrorCode}", + ex.Status, ex.ErrorCode, ex); + + case HttpStatusCode.Forbidden: + case HttpStatusCode.Unauthorized: + throw new ContainerInitializationException( + $"Not authorized to create container '{_settings.ContainerName}': [{ex.ErrorCode}]", + ex.Status, ex.ErrorCode, ex); + + default: + // Other errors (429, 500, 503, etc.) + throw new ContainerInitializationException( + $"Container '{_settings.ContainerName}' creation failed with status {ex.Status}: {ex.ErrorCode}", + ex.Status, ex.ErrorCode, ex); + } + } _initialized = true; } @@ -166,6 +219,11 @@ public async Task> UpdateLeaseResource( _log.Debug("Lease resource {0} created", leaseName); return ToLeaseResource(leaseBody, operationResponse); } + catch (ContainerInitializationException e) + { + _log.Warning(e, "Container initialization failed while creating lease {0}: {1}", leaseName, e.Message); + return null; + } catch (RequestFailedException e) { switch ((HttpStatusCode)e.Status) @@ -212,6 +270,11 @@ private async Task LeaseResourceExists(string leaseName) var response = await blobClient.ExistsAsync(cts.Token); return response.Value; } + catch (ContainerInitializationException e) + { + _log.Warning(e, "Container initialization failed while checking lease {0} existence: {1}", leaseName, e.Message); + return false; + } catch (RequestFailedException e) { throw (HttpStatusCode)e.Status switch @@ -251,6 +314,11 @@ private async Task LeaseResourceExists(string leaseName) _log.Debug("Resource {0} exists: {1}", leaseName, lease); return lease; } + catch (ContainerInitializationException e) + { + _log.Warning(e, "Container initialization failed while retrieving lease {0}: {1}", leaseName, e.Message); + return null; + } catch (RequestFailedException e) { switch ((HttpStatusCode) e.Status)