diff --git a/src/Aspire.Hosting/Dcp/ApplicationExecutor.cs b/src/Aspire.Hosting/Dcp/ApplicationExecutor.cs index e8e3bc44e50..b536ffba12d 100644 --- a/src/Aspire.Hosting/Dcp/ApplicationExecutor.cs +++ b/src/Aspire.Hosting/Dcp/ApplicationExecutor.cs @@ -2088,14 +2088,16 @@ async Task StartExecutableOrContainerAsync(T resource) where T : CustomResour // Ensure resource is deleted. DeleteAsync returns before the resource is completely deleted so we must poll // to discover when it is safe to recreate the resource. This is required because the resources share the same name. + // Deleting a resource might take a while (more than 10 seconds), because DCP tries to gracefully shut it down first + // before resorting to more extreme measures. if (!resourceNotFound) { var ensureDeleteRetryStrategy = new RetryStrategyOptions() { - BackoffType = DelayBackoffType.Linear, - MaxDelay = TimeSpan.FromSeconds(0.5), + BackoffType = DelayBackoffType.Exponential, + Delay = TimeSpan.FromMilliseconds(200), UseJitter = true, - MaxRetryAttempts = 5, + MaxRetryAttempts = 6, // Cumulative time for all attempts amounts to about 12 seconds ShouldHandle = new PredicateBuilder().Handle(), OnRetry = (retry) => { diff --git a/src/Aspire.Hosting/Dcp/KubernetesService.cs b/src/Aspire.Hosting/Dcp/KubernetesService.cs index 44797d80699..072d31053b6 100644 --- a/src/Aspire.Hosting/Dcp/KubernetesService.cs +++ b/src/Aspire.Hosting/Dcp/KubernetesService.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using Aspire.Hosting.Dcp.Model; using k8s; +using k8s.Autorest; using k8s.Exceptions; using k8s.Models; using Microsoft.Extensions.Logging; @@ -90,6 +91,7 @@ public Task GetAsync(string name, string? namespaceParameter = null, Cance return KubernetesJson.Deserialize(response.Body.ToString()); }, + RetryOnConnectivityAndConflictErrors, cancellationToken); } @@ -122,6 +124,7 @@ public Task CreateAsync(T obj, CancellationToken cancellationToken = defau return KubernetesJson.Deserialize(response.Body.ToString()); }, + RetryOnConnectivityErrors, cancellationToken); } @@ -156,6 +159,7 @@ public Task PatchAsync(T obj, V1Patch patch, CancellationToken cancellatio return KubernetesJson.Deserialize(response.Body.ToString()); }, + RetryOnConnectivityErrors, cancellationToken); } @@ -185,6 +189,7 @@ public Task> ListAsync(string? namespaceParameter = null, Cancellatio return KubernetesJson.Deserialize>(response.Body.ToString()).Items; }, + RetryOnConnectivityAndConflictErrors, cancellationToken); } @@ -216,6 +221,7 @@ public Task DeleteAsync(string name, string? namespaceParameter = null, Ca return KubernetesJson.Deserialize(response.Body.ToString()); }, + RetryOnConnectivityAndConflictErrors, cancellationToken); } @@ -248,6 +254,7 @@ public Task DeleteAsync(string name, string? namespaceParameter = null, Ca return responseTask.WatchAsync(null, cancellationToken); }, + RetryOnConnectivityAndConflictErrors, cancellationToken).ConfigureAwait(false); await foreach (var item in result.ConfigureAwait(false)) @@ -290,6 +297,7 @@ public Task GetLogStreamAsync( return response.Body; }, + RetryOnConnectivityAndConflictErrors, cancellationToken ); } @@ -315,12 +323,14 @@ private Task ExecuteWithRetry( DcpApiOperationType operationType, string resourceType, Func operation, + Func isRetryable, CancellationToken cancellationToken) { return ExecuteWithRetry( operationType, resourceType, (DcpKubernetesClient kubernetes) => Task.FromResult(operation(kubernetes)), + isRetryable, cancellationToken); } @@ -328,6 +338,7 @@ private async Task ExecuteWithRetry( DcpApiOperationType operationType, string resourceType, Func> operation, + Func isRetryable, CancellationToken cancellationToken) { var currentTimestamp = DateTime.UtcNow; @@ -344,7 +355,7 @@ private async Task ExecuteWithRetry( await EnsureKubernetesAsync(cancellationToken).ConfigureAwait(false); return await operation(_kubernetes!).ConfigureAwait(false); } - catch (Exception e) when (IsRetryable(e)) + catch (Exception e) when (isRetryable(e)) { if (DateTime.UtcNow.Subtract(currentTimestamp) > MaxRetryDuration) { @@ -364,7 +375,11 @@ private async Task ExecuteWithRetry( } } - private static bool IsRetryable(Exception ex) => ex is HttpRequestException || ex is KubeConfigException; + private static bool RetryOnConnectivityErrors(Exception ex) => ex is HttpRequestException || ex is KubeConfigException; + private static bool RetryOnConnectivityAndConflictErrors(Exception ex) => + ex is HttpRequestException || + ex is KubeConfigException || + (ex is HttpOperationException hoe && hoe.Response.StatusCode == System.Net.HttpStatusCode.Conflict); private ResiliencePipeline GetReadKubeconfigResiliencePipeline() {