From 9af809d1702c481b7ba131a69dd0c9ca3f9da3aa Mon Sep 17 00:00:00 2001 From: Neil Tripician Date: Tue, 28 Apr 2026 11:50:49 -0700 Subject: [PATCH] HttpTimeoutPolicy: Fixes aggressive 500ms first-attempt timeout in HttpTimeoutPolicyControlPlaneRetriableHotPath Raises the first-attempt timeout for HttpTimeoutPolicyControlPlaneRetriableHotPath from 500ms to 1s, aligning with the precedent set by HttpTimeoutPolicyForThinClient (#5496) and HttpTimeoutPolicyForPartitionFailover (#5484). The original 500ms value was too aggressive for .NET 10's HttpConnectionPool behavior and any environment with moderate network latency, producing spurious TaskCanceledExceptions that the SDK then retried successfully but at the cost of wasted work and noisy customer telemetry. The 5s and 65s tail attempts are preserved to keep the existing retry budget for genuinely slow control-plane operations. Fixes #5642 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../HttpTimeoutPolicyControlPlaneRetriableHotPath.cs | 7 ++++++- .../CosmosHttpClientCoreTests.cs | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs index 8143b49015..ad4dbeb8d0 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs @@ -20,9 +20,14 @@ private HttpTimeoutPolicyControlPlaneRetriableHotPath(bool shouldThrow503OnTimeo this.shouldThrow503OnTimeout = shouldThrow503OnTimeout; } + // The first-attempt timeout was raised from 500ms to 1s to align with HttpTimeoutPolicyForThinClient + // (see issue #5642). The original 500ms value caused spurious TaskCanceledException retries on + // .NET 10 due to changes in HttpConnectionPool behavior and any environment with moderate network + // latency. The 5s and 65s tail attempts are preserved to keep the existing retry budget for slow + // control-plane operations. private readonly IReadOnlyList<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> TimeoutsAndDelays = new List<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)>() { - (TimeSpan.FromSeconds(.5), TimeSpan.Zero), + (TimeSpan.FromSeconds(1), TimeSpan.Zero), (TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(1)), (TimeSpan.FromSeconds(65), TimeSpan.Zero), }; diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs index 187df15b9c..c411ef70e0 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs @@ -75,7 +75,7 @@ public async Task RetryTransientIssuesTestAsync() }}, {HttpTimeoutPolicyControlPlaneRetriableHotPath.Instance, new List() { - TimeSpan.FromSeconds(1), + TimeSpan.FromSeconds(2), TimeSpan.FromSeconds(6), TimeSpan.FromSeconds(66) }},