@@ -50,12 +50,14 @@ internal class GatewayAddressCache : IAddressCache, IDisposable
50
50
private readonly ICosmosAuthorizationTokenProvider tokenProvider ;
51
51
private readonly bool enableTcpConnectionEndpointRediscovery ;
52
52
53
+ private readonly SemaphoreSlim semaphore ;
53
54
private readonly CosmosHttpClient httpClient ;
54
55
private readonly bool isReplicaAddressValidationEnabled ;
55
56
56
57
private Tuple < PartitionKeyRangeIdentity , PartitionAddressInformation > masterPartitionAddressCache ;
57
58
private DateTime suboptimalMasterPartitionTimestamp ;
58
59
private bool disposedValue ;
60
+ private bool validateUnknownReplicas ;
59
61
private IOpenConnectionsHandler openConnectionsHandler ;
60
62
61
63
public GatewayAddressCache (
@@ -90,8 +92,10 @@ public GatewayAddressCache(
90
92
Constants . Properties . Protocol ,
91
93
GatewayAddressCache . ProtocolString ( this . protocol ) ) ;
92
94
95
+ this . semaphore = new SemaphoreSlim ( 1 , 1 ) ;
93
96
this . openConnectionsHandler = openConnectionsHandler ;
94
97
this . isReplicaAddressValidationEnabled = replicaAddressValidationEnabled ;
98
+ this . validateUnknownReplicas = false ;
95
99
}
96
100
97
101
public Uri ServiceEndpoint => this . serviceEndpoint ;
@@ -120,6 +124,14 @@ public async Task OpenConnectionsAsync(
120
124
List < Task > tasks = new ( ) ;
121
125
int batchSize = GatewayAddressCache . DefaultBatchSize ;
122
126
127
+ // By design, the Unknown replicas are validated only when the following two conditions meet:
128
+ // 1) The CosmosClient is initiated using the CreateAndInitializaAsync() flow.
129
+ // 2) The advanced replica selection feature enabled.
130
+ if ( shouldOpenRntbdChannels )
131
+ {
132
+ this . validateUnknownReplicas = true ;
133
+ }
134
+
123
135
#if ! ( NETSTANDARD15 || NETSTANDARD16 )
124
136
#if NETSTANDARD20
125
137
// GetEntryAssembly returns null when loaded from native netstandard2.0
@@ -302,11 +314,12 @@ public async Task<PartitionAddressInformation> TryGetAddressesAsync(
302
314
. ReplicaTransportAddressUris
303
315
. Any ( x => x . ShouldRefreshHealthStatus ( ) ) )
304
316
{
305
- Task refreshAddressesInBackgroundTask = Task . Run ( async ( ) =>
317
+ bool slimAcquired = await this . semaphore . WaitAsync ( 0 ) ;
318
+ try
306
319
{
307
- try
320
+ if ( slimAcquired )
308
321
{
309
- await this . serverPartitionAddressCache . RefreshAsync (
322
+ this . serverPartitionAddressCache . Refresh (
310
323
key : partitionKeyRangeIdentity ,
311
324
singleValueInitFunc : ( currentCachedValue ) => this . GetAddressesForRangeIdAsync (
312
325
request ,
@@ -315,14 +328,21 @@ await this.serverPartitionAddressCache.RefreshAsync(
315
328
partitionKeyRangeIdentity . PartitionKeyRangeId ,
316
329
forceRefresh : true ) ) ;
317
330
}
318
- catch ( Exception ex )
331
+ else
319
332
{
320
- DefaultTrace . TraceWarning ( "Failed to refresh addresses in the background for the collection rid: {0} with exception : {1}. '{2}'" ,
333
+ DefaultTrace . TraceVerbose ( "Failed to refresh addresses in the background for the collection rid: {0}, partition key range id : {1}, because the semaphore is already acquired . '{2}'" ,
321
334
partitionKeyRangeIdentity . CollectionRid ,
322
- ex ,
335
+ partitionKeyRangeIdentity . PartitionKeyRangeId ,
323
336
System . Diagnostics . Trace . CorrelationManager . ActivityId ) ;
324
337
}
325
- } ) ;
338
+ }
339
+ finally
340
+ {
341
+ if ( slimAcquired )
342
+ {
343
+ this . semaphore . Release ( ) ;
344
+ }
345
+ }
326
346
}
327
347
328
348
return addresses ;
@@ -1008,18 +1028,26 @@ private static PartitionAddressInformation MergeAddresses(
1008
1028
/// Returns a list of <see cref="TransportAddressUri"/> needed to validate their health status. Validating
1009
1029
/// a uri is done by opening Rntbd connection to the backend replica, which is a costly operation by nature. Therefore
1010
1030
/// vaidating both Unhealthy and Unknown replicas at the same time could impose a high CPU utilization. To avoid this
1011
- /// situation, the RntbdOpenConnectionHandler has good concurrency control mechanism to open the connections gracefully/>.
1031
+ /// situation, the RntbdOpenConnectionHandler has good concurrency control mechanism to open the connections gracefully.
1032
+ /// By default, this method only returns the Unhealthy replicas that requires to validate it's connectivity status. The
1033
+ /// Unknown replicas are validated only when the CosmosClient is initiated using the CreateAndInitializaAsync() flow.
1012
1034
/// </summary>
1013
1035
/// <param name="transportAddresses">A read only list of <see cref="TransportAddressUri"/>s.</param>
1014
1036
/// <returns>A list of <see cref="TransportAddressUri"/> that needs to validate their status.</returns>
1015
1037
private IEnumerable < TransportAddressUri > GetAddressesNeededToValidateStatus (
1016
1038
IReadOnlyList < TransportAddressUri > transportAddresses )
1017
1039
{
1018
- return transportAddresses
1019
- . Where ( address => address
1040
+ return this . validateUnknownReplicas
1041
+ ? transportAddresses
1042
+ . Where ( address => address
1043
+ . GetCurrentHealthState ( )
1044
+ . GetHealthStatus ( ) is
1045
+ TransportAddressHealthState . HealthStatus . UnhealthyPending or
1046
+ TransportAddressHealthState . HealthStatus . Unknown )
1047
+ : transportAddresses
1048
+ . Where ( address => address
1020
1049
. GetCurrentHealthState ( )
1021
1050
. GetHealthStatus ( ) is
1022
- TransportAddressHealthState . HealthStatus . Unknown or
1023
1051
TransportAddressHealthState . HealthStatus . UnhealthyPending ) ;
1024
1052
}
1025
1053
0 commit comments