diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java index ce0e9db24425..d71a415b2370 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java @@ -36,6 +36,7 @@ import com.codahale.metrics.jvm.MemoryUsageGaugeSet; import io.micrometer.core.instrument.MeterRegistry; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; import org.mpierce.metrics.reservoir.hdrhistogram.HdrHistogramResetOnSnapshotReservoir; import org.reactivestreams.Subscription; import org.slf4j.Logger; @@ -88,6 +89,16 @@ abstract class AsyncBenchmark { logger = LoggerFactory.getLogger(this.getClass()); configuration = cfg; + if (configuration.isPartitionLevelCircuitBreakerEnabled()) { + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + } + CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() .endpoint(cfg.getServiceEndpoint()) .key(cfg.getMasterKey()) @@ -138,8 +149,11 @@ abstract class AsyncBenchmark { cosmosClientBuilder = cosmosClientBuilder.gatewayMode(gatewayConnectionConfig); } - CosmosClient syncClient = cosmosClientBuilder.buildClient(); cosmosClient = cosmosClientBuilder.buildAsyncClient(); + CosmosClient syncClient = cosmosClientBuilder + .endpoint(StringUtils.isNotEmpty(configuration.getServiceEndpointForRunResultsUploadAccount()) ? configuration.getServiceEndpointForRunResultsUploadAccount() : configuration.getServiceEndpoint()) + .key(StringUtils.isNotEmpty(configuration.getMasterKeyForRunResultsUploadAccount()) ? configuration.getMasterKeyForRunResultsUploadAccount() : configuration.getMasterKey()) + .buildClient(); try { cosmosAsyncDatabase = cosmosClient.getDatabase(this.configuration.getDatabaseId()); @@ -168,6 +182,17 @@ abstract class AsyncBenchmark { ).block(); cosmosAsyncContainer = cosmosAsyncDatabase.getContainer(this.configuration.getCollectionId()); + + // add some delay to allow container to be created across multiple regions + // container creation across regions is an async operation + // without the delay a container may not be available to process reads / writes + + try { + Thread.sleep(30_000); + } catch (Exception exception) { + throw new RuntimeException(exception); + } + logger.info("Collection {} is created for this test", this.configuration.getCollectionId()); collectionCreated = true; } else { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java index b3292b6340e2..0de2957d4385 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java @@ -47,6 +47,12 @@ public class Configuration { @Parameter(names = "-masterKey", description = "Master Key") private String masterKey; + @Parameter(names = "-serviceEndpointForResultsUploadAccount", description = "Service Endpoint for run results upload account") + private String serviceEndpointForRunResultsUploadAccount; + + @Parameter(names = "-masterKeyForResultsUploadAccount", description = "Master Key for run results upload account") + private String masterKeyForRunResultsUploadAccount; + @Parameter(names = "-databaseId", description = "Database ID") private String databaseId; @@ -137,6 +143,9 @@ public class Configuration { @Parameter(names = "-isRegionScopedSessionContainerEnabled", description = "A flag to denote whether region scoped session container is enabled") private String isRegionScopedSessionContainerEnabled = String.valueOf(false); + @Parameter(names = "isPartitionLevelCircuitBreakerEnabled", description = "A flag to denote whether partition level circuit breaker is enabled.") + private String isPartitionLevelCircuitBreakerEnabled = String.valueOf(true); + @Parameter(names = "-operation", description = "Type of Workload:\n" + "\tReadThroughput- run a READ workload that prints only throughput *\n" + "\tReadThroughputWithMultipleClients - run a READ workload that prints throughput and latency for multiple client read.*\n" @@ -397,6 +406,14 @@ public String getMasterKey() { return masterKey; } + public String getServiceEndpointForRunResultsUploadAccount() { + return serviceEndpointForRunResultsUploadAccount; + } + + public String getMasterKeyForRunResultsUploadAccount() { + return masterKeyForRunResultsUploadAccount; + } + public String getApplicationName() { return applicationName; } @@ -639,6 +656,10 @@ public boolean isRegionScopedSessionContainerEnabled() { return Boolean.parseBoolean(isRegionScopedSessionContainerEnabled); } + public boolean isPartitionLevelCircuitBreakerEnabled() { + return Boolean.parseBoolean(isPartitionLevelCircuitBreakerEnabled); + } + public void tryGetValuesFromSystem() { serviceEndpoint = StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("SERVICE_END_POINT")), serviceEndpoint); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java index 3c48c5821e03..79faed4b831c 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java @@ -32,6 +32,7 @@ import com.codahale.metrics.jvm.MemoryUsageGaugeSet; import io.micrometer.core.instrument.MeterRegistry; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; import org.mpierce.metrics.reservoir.hdrhistogram.HdrHistogramResetOnSnapshotReservoir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -114,6 +115,16 @@ public T apply(T o, Throwable throwable) { configuration = cfg; logger = LoggerFactory.getLogger(this.getClass()); + if (configuration.isPartitionLevelCircuitBreakerEnabled()) { + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + } + CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() .endpoint(cfg.getServiceEndpoint()) .preferredRegions(cfg.getPreferredRegionsList()) @@ -147,6 +158,11 @@ public T apply(T o, Throwable throwable) { } cosmosClient = cosmosClientBuilder.buildClient(); + CosmosClient syncClient = cosmosClientBuilder + .endpoint(StringUtils.isNotEmpty(configuration.getServiceEndpointForRunResultsUploadAccount()) ? configuration.getServiceEndpointForRunResultsUploadAccount() : configuration.getServiceEndpoint()) + .key(StringUtils.isNotEmpty(configuration.getMasterKeyForRunResultsUploadAccount()) ? configuration.getMasterKeyForRunResultsUploadAccount() : configuration.getMasterKey()) + .buildClient(); + try { cosmosDatabase = cosmosClient.getDatabase(this.configuration.getDatabaseId()); cosmosDatabase.read(); @@ -171,6 +187,16 @@ public T apply(T o, Throwable throwable) { ThroughputProperties.createManualThroughput(this.configuration.getThroughput())); cosmosContainer = cosmosDatabase.getContainer(this.configuration.getCollectionId()); logger.info("Collection {} is created for this test", this.configuration.getCollectionId()); + + // add some delay to allow container to be created across multiple regions + // container creation across regions is an async operation + // without the delay a container may not be available to process reads / writes + try { + Thread.sleep(30_000); + } catch (Exception exception) { + throw new RuntimeException(exception); + } + collectionCreated = true; } else { throw e; @@ -236,7 +262,7 @@ public T apply(T o, Throwable throwable) { resultReporter = CosmosTotalResultReporter .forRegistry( metricsRegistry, - cosmosClient.getDatabase(configuration.getResultUploadDatabase()).getContainer(configuration.getResultUploadContainer()), + syncClient.getDatabase(configuration.getResultUploadDatabase()).getContainer(configuration.getResultUploadContainer()), configuration) .convertRatesTo(TimeUnit.SECONDS) .convertDurationsTo(TimeUnit.MILLISECONDS).build(); diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index 429ba031cc35..b73644a4eb73 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -465,6 +465,69 @@ Licensed under the MIT License. + + + circuit-breaker-read-all-read-many + + circuit-breaker-read-all-read-many + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-read-all-read-many-testng.xml + + + + + + + + + circuit-breaker-misc-direct + + circuit-breaker-misc-direct + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-misc-direct-testng.xml + + + + + + + + + circuit-breaker-misc-gateway + + circuit-breaker-misc-gateway + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-misc-gateway-testng.xml + + + + + + flaky-multi-master diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java new file mode 100644 index 000000000000..d76c5449fd14 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -0,0 +1,947 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos; + +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.MetadataDiagnosticsContext; +import com.azure.cosmos.implementation.OperationType; +import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; +import com.azure.cosmos.implementation.ResourceType; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.SerializationDiagnosticsContext; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; +import com.azure.cosmos.implementation.guava25.collect.ImmutableList; +import org.apache.commons.lang3.tuple.Pair; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.lang.reflect.Field; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; + +import static com.azure.cosmos.implementation.TestUtils.mockDiagnosticsClientContext; +import static org.assertj.core.api.Assertions.assertThat; + +public class GlobalPartitionEndpointManagerForCircuitBreakerTests { + + private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreakerTests.class); + private final static Pair LocationEastUsEndpointToLocationPair = Pair.of(createUrl("https://contoso-east-us.documents.azure.com"), "eastus"); + private final static Pair LocationEastUs2EndpointToLocationPair = Pair.of(createUrl("https://contoso-east-us-2.documents.azure.com"), "eastus2"); + private final static Pair LocationCentralUsEndpointToLocationPair = Pair.of(createUrl("https://contoso-central-us.documents.azure.com"), "centralus"); + + private static final boolean READ_OPERATION_TRUE = true; + + private GlobalEndpointManager globalEndpointManagerMock; + + @BeforeClass(groups = {"unit"}) + public void beforeClass() { + this.globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUsEndpointToLocationPair.getKey(), OperationType.Read)) + .thenReturn(LocationEastUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUsEndpointToLocationPair.getKey(), OperationType.Create)) + .thenReturn(LocationEastUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationCentralUsEndpointToLocationPair.getKey(), OperationType.Read)) + .thenReturn(LocationCentralUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationCentralUsEndpointToLocationPair.getKey(), OperationType.Create)) + .thenReturn(LocationCentralUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUs2EndpointToLocationPair.getKey(), OperationType.Read)) + .thenReturn(LocationEastUs2EndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUs2EndpointToLocationPair.getKey(), OperationType.Create)) + .thenReturn(LocationEastUs2EndpointToLocationPair.getRight()); + } + + @DataProvider(name = "partitionLevelCircuitBreakerConfigs") + public Object[][] partitionLevelCircuitBreakerConfigs() { + return new Object[][]{ + new Object[]{ + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}", + READ_OPERATION_TRUE + }, + new Object[]{ + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}", + !READ_OPERATION_TRUE + } + }; + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationSuccessForPartitionKeyRange(request); + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + LocationSpecificHealthContext locationSpecificHealthContext + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getReadEndpoints()).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getWriteEndpoints()).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + LocationSpecificHealthContext locationSpecificHealthContext + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + LocationSpecificHealthContext locationSpecificHealthContext + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + LocationSpecificHealthContext locationSpecificHealthContext + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + try { + Thread.sleep(65_000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + LocationSpecificHealthContext locationSpecificHealthContext + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + try { + Thread.sleep(90_000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + int successCountToUpgradeStatus = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus.HealthyTentative, readOperationTrue); + + for (int i = 1; i <= successCountToUpgradeStatus + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationSuccessForPartitionKeyRange(request); + } + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + LocationSpecificHealthContext locationSpecificHealthContext + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + try { + Thread.sleep(65_000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyTentative, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker + .getConsecutiveExceptionBasedCircuitBreaker() + .getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUsEndpointToLocationPair.getKey()); + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationCentralUsEndpointToLocationPair.getKey()); + } + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + assertThat(partitionAndLocationSpecificUnavailabilityInfo).isNull(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId1 = "dbs/db1/colls/coll1"; + String collectionResourceId2 = "dbs/db1/colls/coll2"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request1 = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId1, + pkRangeId, + collectionResourceId1, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + RxDocumentServiceRequest request2 = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId2, + pkRangeId, + collectionResourceId2, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request1, LocationEastUs2EndpointToLocationPair.getKey()); + } + + globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request2); + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionLevelLocationUnavailabilityInfoSnapshotForColl1 + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId1)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl1 + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl1); + + Object partitionLevelLocationUnavailabilityInfoSnapshotForColl2 + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId2)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl2 + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl2); + + LocationSpecificHealthContext locationSpecificHealthContext1 + = locationEndpointToLocationSpecificContextForPartitionForColl1.get(LocationEastUs2EndpointToLocationPair.getKey()); + + LocationSpecificHealthContext locationSpecificHealthContext2 + = locationEndpointToLocationSpecificContextForPartitionForColl2.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificHealthContext1.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext1.isExceptionThresholdBreached()).isTrue(); + + assertThat(locationSpecificHealthContext2.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext2.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + int threadPoolSizeForExecutors = 4; + + ScheduledThreadPoolExecutor executorForEastUs = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); + executorForEastUs.setRemoveOnCancelPolicy(true); + executorForEastUs.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + + ScheduledThreadPoolExecutor executorForCentralUs = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); + executorForCentralUs.setRemoveOnCancelPolicy(true); + executorForCentralUs.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + + ScheduledThreadPoolExecutor executorForEastUs2 = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); + executorForEastUs2.setRemoveOnCancelPolicy(true); + executorForEastUs2.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + + List> scheduledFutures = new ArrayList<>(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + PartitionKeyRange partitionKeyRange = new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive); + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(Pair::getLeft) + .collect(Collectors.toList()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + RxDocumentServiceRequest requestCentralUs = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationCentralUsEndpointToLocationPair.getKey()); + + RxDocumentServiceRequest requestEastUs = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUsEndpointToLocationPair.getKey()); + + RxDocumentServiceRequest requestEastUs2 = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + collectionResourceId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker + .getConsecutiveExceptionBasedCircuitBreaker() + .getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle * 10; i++) { + + ScheduledFuture scheduledFutureForEastUs = executorForEastUs.schedule( + () -> validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + globalPartitionEndpointManagerForCircuitBreaker, + requestEastUs, + LocationEastUsEndpointToLocationPair.getKey(), + collectionResourceId, + partitionKeyRange, + applicableReadWriteEndpoints), + 1, + TimeUnit.MILLISECONDS); + + ScheduledFuture scheduledFutureForCentralUs = executorForCentralUs.schedule( + () -> validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + globalPartitionEndpointManagerForCircuitBreaker, + requestCentralUs, + LocationCentralUsEndpointToLocationPair.getKey(), + collectionResourceId, + partitionKeyRange, + applicableReadWriteEndpoints), + 1, + TimeUnit.MILLISECONDS); + + ScheduledFuture scheduledFutureForEastUs2 = executorForEastUs2.schedule( + () -> validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + globalPartitionEndpointManagerForCircuitBreaker, + requestEastUs2, + LocationEastUs2EndpointToLocationPair.getKey(), + collectionResourceId, + partitionKeyRange, + applicableReadWriteEndpoints), + 1, + TimeUnit.MILLISECONDS); + + scheduledFutures.add(scheduledFutureForEastUs); + scheduledFutures.add(scheduledFutureForCentralUs); + scheduledFutures.add(scheduledFutureForEastUs2); + } + + while (true) { + + boolean areTasksStillRunning = false; + + for (ScheduledFuture scheduledFuture : scheduledFutures) { + if (!scheduledFuture.isDone()) { + areTasksStillRunning = true; + break; + } + } + + if (!areTasksStillRunning) { + break; + } + } + + executorForEastUs.shutdown(); + executorForCentralUs.shutdown(); + executorForEastUs2.shutdown(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + private static void validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker, + RxDocumentServiceRequest request, + URI locationWithFailure, + String collectionResourceId, + PartitionKeyRange partitionKeyRange, + List applicableReadWriteLocations) { + + logger.warn("Handling exception for {}", locationWithFailure.getPath()); + globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(request, locationWithFailure); + + List unavailableRegions + = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange(collectionResourceId, partitionKeyRange, request.getOperationType()); + + logger.info("Assert that all regions are not Unavailable!"); + assertThat(unavailableRegions.size()).isLessThan(applicableReadWriteLocations.size()); + } + + private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( + OperationType operationType, + ResourceType resourceType, + String collectionResourceId, + String partitionKeyRangeId, + String collectionLink, + String minInclusive, + String maxExclusive, + URI locationEndpointToRoute) { + + RxDocumentServiceRequest request = RxDocumentServiceRequest.create( + mockDiagnosticsClientContext(), + operationType, + resourceType); + + request.setResourceId(collectionResourceId); + + request.requestContext.resolvedPartitionKeyRange = new PartitionKeyRange(partitionKeyRangeId, minInclusive, maxExclusive); + request.requestContext.locationEndpointToRoute = locationEndpointToRoute; + request.requestContext.setExcludeRegions(Collections.emptyList()); + request.requestContext.setPointOperationContext( + new PointOperationContextForCircuitBreaker( + new AtomicBoolean(false), + false, + collectionLink, + new MetadataDiagnosticsContext(), + new SerializationDiagnosticsContext())); + + return request; + } + + private static URI createUrl(String url) { + try { + return new URI(url); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + } + + private static Class getClassBySimpleName(Class[] classes, String classSimpleName) { + for (Class clazz : classes) { + if (clazz.getSimpleName().equals(classSimpleName)) { + return clazz; + } + } + + logger.warn("Class with simple name {} does not exist!", classSimpleName); + return null; + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java index 44d0a06f2c2c..7cefbadb33b4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java @@ -131,10 +131,10 @@ public class MaxRetryCountTests extends TestSuiteBase { assertThat(subStatusCode).isEqualTo(HttpConstants.SubStatusCodes.SERVER_GENERATED_408); }; - private final static BiConsumer validateStatusCodeIsTimeout = + private final static BiConsumer validateStatusCodeIsTransitTimeout = (statusCode, subStatusCode) -> { assertThat(statusCode).isEqualTo(HttpConstants.StatusCodes.REQUEST_TIMEOUT); - assertThat(subStatusCode).isEqualTo(HttpConstants.SubStatusCodes.UNKNOWN); + assertThat(subStatusCode).isEqualTo(HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT); }; private final static BiConsumer validateStatusCodeIsTransitTimeoutGenerated503ForWrite = @@ -944,7 +944,7 @@ public Object[][] testConfigs_readMaxRetryCount_transitTimeout() { notSpecifiedWhetherIdempotentWriteRetriesAreEnabled, sameDocumentIdJustCreated, injectTransitTimeoutIntoAllRegions.apply(minNetworkRequestTimeoutDuration), - validateStatusCodeIsTimeout, // when idempotent write is disabled, SDK will not retry for write operation, 408 will be bubbled up + validateStatusCodeIsTransitTimeout, // when idempotent write is disabled, SDK will not retry for write operation, 408 will be bubbled up (TriConsumer)(requestCount, consistencyLevel, operationType) -> assertThat(requestCount).isLessThanOrEqualTo( expectedMaxNumberOfRetriesForTransientTimeout( diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java new file mode 100644 index 000000000000..1015adf98ffd --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -0,0 +1,3523 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos; + +import com.azure.cosmos.faultinjection.FaultInjectionTestBase; +import com.azure.cosmos.implementation.ConnectionPolicy; +import com.azure.cosmos.implementation.DatabaseAccount; +import com.azure.cosmos.implementation.DatabaseAccountLocation; +import com.azure.cosmos.implementation.DocumentCollection; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.RxDocumentClientImpl; +import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.ConsecutiveExceptionBasedCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; +import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; +import com.azure.cosmos.implementation.feedranges.FeedRangePartitionKeyImpl; +import com.azure.cosmos.implementation.guava25.base.Function; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchItemRequestOptions; +import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosReadManyRequestOptions; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; +import com.azure.cosmos.test.faultinjection.FaultInjectionCondition; +import com.azure.cosmos.test.faultinjection.FaultInjectionConditionBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionConnectionType; +import com.azure.cosmos.test.faultinjection.FaultInjectionEndpointBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionResultBuilders; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionRuleBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorResult; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.SkipException; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.lang.reflect.Field; +import java.net.URI; +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.testng.Assert.fail; + +public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { + + private static final ImplementationBridgeHelpers.CosmosAsyncContainerHelper.CosmosAsyncContainerAccessor containerAccessor + = ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor(); + private List writeRegions; + + private static final CosmosEndToEndOperationLatencyPolicyConfig NO_END_TO_END_TIMEOUT + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofDays(1)).build(); + + private static final CosmosEndToEndOperationLatencyPolicyConfig THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(3)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) + .build(); + + private static final CosmosEndToEndOperationLatencyPolicyConfig THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(3)) + .build(); + + Consumer validateDiagnosticsContextHasFirstPreferredRegionOnly = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + Consumer validateDiagnosticsContextHasSecondPreferredRegionOnly = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + Consumer validateDiagnosticsContextHasFirstAndSecondPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(2); + assertThat(ctx.getContactedRegionNames()).contains(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); + assertThat(ctx.getContactedRegionNames()).contains(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + Consumer validateDiagnosticsContextHasAnyTwoPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(2); + }; + + Consumer validateDiagnosticsContextHasAtMostTwoPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isLessThanOrEqualTo(2); + }; + + Consumer validateDiagnosticsContextHasOnePreferredRegion = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isLessThanOrEqualTo(1); + }; + + Consumer validateDiagnosticsContextHasAllRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(this.writeRegions.size()); + + for (String region : this.writeRegions) { + assertThat(ctx.getContactedRegionNames()).contains(region.toLowerCase(Locale.ROOT)); + } + }; + + Consumer> validateResponseHasSuccess = (responseWrapper) -> { + + assertThat(responseWrapper.cosmosException).isNull(); + + if (responseWrapper.feedResponse != null) { + assertThat(responseWrapper.feedResponse.getCosmosDiagnostics()).isNotNull(); + assertThat(responseWrapper.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()).isNotNull(); + + CosmosDiagnosticsContext diagnosticsContext = responseWrapper.feedResponse.getCosmosDiagnostics().getDiagnosticsContext(); + + assertThat(diagnosticsContext.getStatusCode() == HttpConstants.StatusCodes.OK || diagnosticsContext.getStatusCode() == HttpConstants.StatusCodes.NOT_MODIFIED).isTrue(); + } else if (responseWrapper.cosmosItemResponse != null) { + assertThat(responseWrapper.cosmosItemResponse.getDiagnostics()).isNotNull(); + assertThat(responseWrapper.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()).isNotNull(); + + CosmosDiagnosticsContext diagnosticsContext = responseWrapper.cosmosItemResponse.getDiagnostics().getDiagnosticsContext(); + + assertThat(HttpConstants.StatusCodes.OK <= diagnosticsContext.getStatusCode() && diagnosticsContext.getStatusCode() <= HttpConstants.StatusCodes.NO_CONTENT).isTrue(); + } else if (responseWrapper.batchResponse != null) { + assertThat(responseWrapper.batchResponse.getDiagnostics()).isNotNull(); + assertThat(responseWrapper.batchResponse.getDiagnostics().getDiagnosticsContext()).isNotNull(); + + CosmosDiagnosticsContext diagnosticsContext = responseWrapper.batchResponse.getDiagnostics().getDiagnosticsContext(); + + assertThat(HttpConstants.StatusCodes.OK <= diagnosticsContext.getStatusCode() && diagnosticsContext.getStatusCode() <= HttpConstants.StatusCodes.NO_CONTENT).isTrue(); + } + }; + + Consumer> validateResponseHasOperationCancelledException = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.REQUEST_TIMEOUT); + assertThat(responseWrapper.cosmosException.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.CLIENT_OPERATION_TIMEOUT); + }; + + Consumer> validateResponseHasInternalServerError = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR); + }; + + Consumer> validateResponseHasServiceUnavailableError = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE); + }; + + Consumer> validateResponseHasRequestTimeoutException = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.REQUEST_TIMEOUT); + assertThat(responseWrapper.cosmosException.getSubStatusCode()).isNotEqualTo(HttpConstants.SubStatusCodes.CLIENT_OPERATION_TIMEOUT); + }; + + private final Function> buildServiceUnavailableFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildServiceUnavailableFaultInjectionRules; + + private final Function> buildServerGeneratedGoneErrorFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneErrorFaultInjectionRules; + + private final Function> buildTooManyRequestsErrorFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildTooManyRequestsErrorFaultInjectionRules; + + private final Function> buildReadWriteSessionNotAvailableFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableFaultInjectionRules; + + private final Function> buildTransitTimeoutFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildTransitTimeoutFaultInjectionRules; + + private final Function> buildInternalServerErrorFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildInternalServerErrorFaultInjectionRules; + + private final Function> buildRetryWithFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; + + private static final CosmosRegionSwitchHint NO_REGION_SWITCH_HINT = null; + + private static final Boolean NON_IDEMPOTENT_WRITE_RETRIES_ENABLED = true; + + private static final Set ALL_CONNECTION_MODES_INCLUDED = new HashSet<>(); + + private static final Set ONLY_DIRECT_MODE = new HashSet<>(); + + private static final Set ONLY_GATEWAY_MODE = new HashSet<>(); + + private String firstPreferredRegion = null; + + private String secondPreferredRegion = null; + + private String sharedAsyncDatabaseId = null; + + private String sharedMultiPartitionAsyncContainerIdWhereIdIsPartitionKey = null; + + private String sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey = null; + + private String singlePartitionAsyncContainerId = null; + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public PartitionLevelCircuitBreakerTests(CosmosClientBuilder cosmosClientBuilder) { + super(cosmosClientBuilder); + } + + @BeforeClass(groups = {"circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}) + public void beforeClass() { + try (CosmosAsyncClient testClient = getClientBuilder().buildAsyncClient()) { + RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(testClient); + GlobalEndpointManager globalEndpointManager = documentClient.getGlobalEndpointManager(); + + DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + this.writeRegions = new ArrayList<>(this.getRegionMap(databaseAccount, true).keySet()); + + CosmosAsyncDatabase sharedAsyncDatabase = getSharedCosmosDatabase(testClient); + CosmosAsyncContainer sharedMultiPartitionCosmosContainerWithIdAsPartitionKey = getSharedMultiPartitionCosmosContainerWithIdAsPartitionKey(testClient); + CosmosAsyncContainer sharedAsyncMultiPartitionContainerWithMyPkAsPartitionKey = getSharedMultiPartitionCosmosContainer(testClient); + + this.sharedAsyncDatabaseId = sharedAsyncDatabase.getId(); + this.sharedMultiPartitionAsyncContainerIdWhereIdIsPartitionKey = sharedMultiPartitionCosmosContainerWithIdAsPartitionKey.getId(); + this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey = sharedAsyncMultiPartitionContainerWithMyPkAsPartitionKey.getId(); + + this.singlePartitionAsyncContainerId = UUID.randomUUID().toString(); + sharedAsyncDatabase.createContainerIfNotExists(this.singlePartitionAsyncContainerId, "/id").block(); + + ALL_CONNECTION_MODES_INCLUDED.add(ConnectionMode.DIRECT); + ALL_CONNECTION_MODES_INCLUDED.add(ConnectionMode.GATEWAY); + ONLY_DIRECT_MODE.add(ConnectionMode.DIRECT); + ONLY_GATEWAY_MODE.add(ConnectionMode.GATEWAY); + + try { + Thread.sleep(3000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + } finally { + logger.debug("beforeClass executed..."); + } + } + + @DataProvider(name = "miscellaneousOpTestConfigsDirect") + public Object[][] miscellaneousOpTestConfigsDirect() { + + // General testing flow: + // Below tests choose a fault type to inject, regions to inject the fault in + // and the operation type for which the fault is injected. The idea is to assert + // what happens when faults are being injected - should an exception bubble up + // in the process [or] should the operation succeed, region contacted when circuit + // breaking has kicked in and region contacted when region + partition combination is + // being marked back as UnhealthyTentative (eligible to accept requests) + return new Object[][]{ + // Server-generated 503 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + validateResponseHasSuccess, + validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for DELETE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for PATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but include + // the second preferred region when the first preferred region has been short-circuited. + // For queries which require a QueryPlan, the first preferred region is contacted (not a data plane request + // which will hit a data partition so is not eligible for circuit breaking). + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for BATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for DELETE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for PATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. Even + // when short-circuiting of first preferred region has kicked in, the first preferred region is contacted + // to fetch the QueryPlan. + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Response-delay injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + this.buildTransitTimeoutFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Response-delay injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + this.buildTransitTimeoutFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Response-delay injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit RequestTimeoutException (due to network request timeout of 5s kicking in) + // and because NonIdempotentWriteRetryPolicy isn't enabled + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region and with no end-to-end operation timeout configured.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(80)) + .withResponseDelay(Duration.ofSeconds(10)), + this.buildTransitTimeoutFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasRequestTimeoutException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // Response-delay injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit RequestTimeoutException (due to network request timeout of 5s kicking in) + // and because NonIdempotentWriteRetryPolicy isn't enabled + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region and with no end-to-end operation timeout configured.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(80)) + .withResponseDelay(Duration.ofSeconds(10)), + this.buildTransitTimeoutFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasRequestTimeoutException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, +// 500 (internal server error) injected into first preferred region for READ_ITEM operation +// injected into all replicas of the faulty EPK range. +// Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. Although, after short-circuiting, a query operation + // will see request for QueryPlan from the short-circuited region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 404/1002 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 404/1002 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildRetryWithFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildRetryWithFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 449 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) + // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 injected into all regions for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will have one region contacted post circuit breaking (one for QueryPlan and the other for the data plane request). + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAtMostTwoPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (effectively the primary since it is an upsert (write) operation). + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + this.writeRegions.size(), + 25, + 15 + } + }; + } + + @DataProvider(name = "miscellaneousOpTestConfigsGateway") + public Object[][] miscellaneousOpTestConfigsGateway() { + + // General testing flow: + // Below tests choose a fault type to inject, regions to inject the fault in + // and the operation type for which the fault is injected. The idea is to assert + // what happens when faults are being injected - should an exception bubble up + // in the process [or] should the operation succeed, region contacted when circuit + // breaking has kicked in and region contacted when region + partition combination is + // being marked back as UnhealthyTentative (eligible to accept requests) + return new Object[][]{ + // Server-generated 503 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + validateResponseHasSuccess, + validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for DELETE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for PATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but include + // the second preferred region when the first preferred region has been short-circuited. + // For queries which require a QueryPlan, the first preferred region is contacted (not a data plane request + // which will hit a data partition so is not eligible for circuit breaking). + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for BATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, +// 500 (internal server error) injected into first preferred region for READ_ITEM operation +// injected into all replicas of the faulty EPK range. +// Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. Although, after short-circuiting, a query operation + // will see request for QueryPlan from the short-circuited region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 429 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 429 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 429 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 500 injected into all regions for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will have one region contacted post circuit breaking (one for QueryPlan and the other for the data plane request). + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAtMostTwoPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (effectively the primary since it is an upsert (write) operation). + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + this.writeRegions.size(), + 25, + 15 + } + }; + } + + @DataProvider(name = "readManyTestConfigs") + public Object[][] readManyTestConfigs() { + + Function> executeReadManyOperation = (paramsWrapper) -> { + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + List itemIdentities = paramsWrapper.itemIdentitiesForReadManyOperation; + CosmosReadManyRequestOptions readManyRequestOptions = paramsWrapper.readManyRequestOptions; + + try { + + FeedResponse response = asyncContainer.readMany( + itemIdentities, + readManyRequestOptions, + TestObject.class) + .block(); + + return new ResponseWrapper<>(response); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + + return new Object[][]{ + // Server-generated 503 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + { + "Test read many operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServiceUnavailableFaultInjectionRules, + executeReadManyOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + }, + // Internal server error injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit InternalServerError and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read many operation injected with internal server error injected in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildInternalServerErrorFaultInjectionRules, + executeReadManyOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + }, + // Server-generated 410 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read many operation injected with server-generated gone in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + executeReadManyOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 429 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read many operation injected with too many requests error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildTooManyRequestsErrorFaultInjectionRules, + executeReadManyOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + }, + // 404/1002 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read many operation injected with read session not available error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + executeReadManyOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 injected into all region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 and only to succeed when + // fault injection has hit its injection limits. Also, the success is + // from the first preferred region. + { + "Test read many operation injected with internal server error in all preferred regions.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions), + this.buildInternalServerErrorFaultInjectionRules, + executeReadManyOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + this.writeRegions.size(), + 40, + 15 + }, + // 429 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + new Object[]{ + "Test faulty read many operation with too many requests error in first preferred region with threshold-based availability strategy enabled.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + executeReadManyOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + } + }; + } + + @DataProvider(name = "readAllTestConfigs") + public Object[][] readAllTestConfigs() { + + Function> executeReadAllOperation = (paramsWrapper) -> { + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + PartitionKey partitionKey = paramsWrapper.partitionKeyForReadAllOperation; + CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; + + try { + + FeedResponse response = asyncContainer.readAllItems( + partitionKey, + queryRequestOptions, + TestObject.class) + .byPage() + .next() + .block(); + + return new ResponseWrapper<>(response); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + + return new Object[][]{ + // Server-generated 503 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + { + "Test read all operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServiceUnavailableFaultInjectionRules, + executeReadAllOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + }, + // Internal server error injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit InternalServerError and bubble it from the first preferred region + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read all operation injected with internal server error injected in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildInternalServerErrorFaultInjectionRules, + executeReadAllOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + }, + // 410 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read all operation injected with server-generated GONE in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServerGeneratedGoneErrorFaultInjectionRules, + executeReadAllOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 429 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read all operation injected with too many requests error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildTooManyRequestsErrorFaultInjectionRules, + executeReadAllOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + }, + // 404/1002 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read all operation injected with read/write session not available error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + executeReadAllOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 injected into all region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 and only to succeed when + // fault injection has hit its injection limits. Also, the success is + // from the first preferred region. + { + "Test read all operation injected with internal server error in all preferred regions.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions), + this.buildInternalServerErrorFaultInjectionRules, + executeReadAllOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + this.writeRegions.size(), + 40, + 15 + }, + // 429 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + new Object[]{ + "Test faulty read all operation with too many requests error in first preferred region with threshold-based availability strategy enabled.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + executeReadAllOperation, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 + } + }; + } + + @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "miscellaneousOpTestConfigsDirect", timeOut = 4 * TIMEOUT) + public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDirect( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + executeMiscOperationHitsTerminalExceptionAcrossKRegions( + testId, + faultInjectionRuleParamsWrapper, + generateFaultInjectionRules, + e2eLatencyPolicyCfg, + regionSwitchHint, + nonIdempotentWriteRetriesEnabled, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + allowedConnectionModes, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "miscellaneousOpTestConfigsGateway", timeOut = 4 * TIMEOUT) + public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGateway( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + executeMiscOperationHitsTerminalExceptionAcrossKRegions( + testId, + faultInjectionRuleParamsWrapper, + generateFaultInjectionRules, + e2eLatencyPolicyCfg, + regionSwitchHint, + nonIdempotentWriteRetriesEnabled, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + allowedConnectionModes, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } + + CosmosAsyncClient asyncClient = null; + FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + + try { + + asyncClient = clientBuilder.buildAsyncClient(); + + operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); + int testObjCountToBootstrapFrom = operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom; + + operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); + + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + List testObjects = new ArrayList<>(); + + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { + TestObject testObject = TestObject.create(); + testObjects.add(testObject); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + } + + FeedRange faultyFeedRange; + + if (testObjects.size() != 1) { + faultyFeedRange = FeedRange.forFullRange(); + } else { + faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); + } + + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith = testObjects; + + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } + + Function> executeDataPlaneOperation + = resolveDataPlaneOperation(faultInjectionOperationType); + + operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions(); + + if (e2eLatencyPolicyCfg != null) { + operationInvocationParamsWrapper.patchItemRequestOptions = new CosmosPatchItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + + operationInvocationParamsWrapper.queryRequestOptions = new CosmosQueryRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + + operationInvocationParamsWrapper.itemRequestOptions + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } + + if (nonIdempotentWriteRetriesEnabled) { + operationInvocationParamsWrapper.itemRequestOptions + .setNonIdempotentWriteRetryPolicy(true, true); + } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readManyTestConfigs", timeOut = 4 * TIMEOUT) + public void readManyOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = this.writeRegions.get(0); + this.secondPreferredRegion = this.writeRegions.get(1); + + CosmosAsyncClient asyncClient = null; + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.queryType = QueryType.READ_MANY; + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } + + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + + try { + + asyncClient = clientBuilder.buildAsyncClient(); + + operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; + + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + List feedRanges = asyncContainer.getFeedRanges().block(); + + assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); + assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); + + Map> partitionKeyToItemIdentityList = new HashMap<>(); + List partitionKeys = new ArrayList<>(); + + for (FeedRange ignored : feedRanges) { + String pkForFeedRange = UUID.randomUUID().toString(); + + partitionKeys.add(pkForFeedRange); + partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + + for (int i = 0; i < 10; i++) { + TestObject testObject = TestObject.create(pkForFeedRange); + + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); + } + } + + CosmosReadManyRequestOptions readManyRequestOptions = new CosmosReadManyRequestOptions(); + + if (e2eLatencyPolicyCfg != null) { + readManyRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } + + operationInvocationParamsWrapper.readManyRequestOptions = readManyRequestOptions; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); + + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); + + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + + operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readAllTestConfigs", timeOut = 4 * TIMEOUT) + public void readAllOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + CosmosAsyncClient asyncClient = null; + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.queryType = QueryType.READ_ALL; + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } + + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + + try { + + asyncClient = clientBuilder.buildAsyncClient(); + + operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; + + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + deleteAllDocuments(asyncContainer); + + List feedRanges = asyncContainer.getFeedRanges().block(); + + assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); + assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); + + Map> partitionKeyToItemIdentityList = new HashMap<>(); + List partitionKeys = new ArrayList<>(); + + for (FeedRange ignored : feedRanges) { + String pkForFeedRange = UUID.randomUUID().toString(); + + partitionKeys.add(pkForFeedRange); + partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + + for (int i = 0; i < 10; i++) { + TestObject testObject = TestObject.create(pkForFeedRange); + + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); + } + } + + CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); + + if (e2eLatencyPolicyCfg != null) { + queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); + + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); + + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + operationInvocationParamsWrapper.partitionKeyForReadAllOperation = faultyPartitionKey; + operationInvocationParamsWrapper.queryRequestOptions = queryRequestOptions; + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + private void execute( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + OperationInvocationParamsWrapper operationInvocationParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFailures, + Consumer> validateResponseInAbsenceOfFailures, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + logger.info("Checking circuit breaking behavior for test type {}", testId); + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + + validateNonEmptyString(this.sharedAsyncDatabaseId); + CosmosAsyncDatabase database = client.getDatabase(this.sharedAsyncDatabaseId); + + CosmosAsyncContainer container; + + try { + + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + container = database.getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); + + RxCollectionCache collectionCache = ReflectionUtils.getClientCollectionCache(documentClient); + RxPartitionKeyRangeCache partitionKeyRangeCache = ReflectionUtils.getPartitionKeyRangeCache(documentClient); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(operationInvocationParamsWrapper.faultyFeedRange); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); + + Utils.ValueHolder faultyFeedRangeEpkImpl = new Utils.ValueHolder<>(); + Utils.ValueHolder faultyFeedRangePartitionKeyImpl = new Utils.ValueHolder<>(); + Utils.ValueHolder> faultyPartitionKeyRanges = new Utils.ValueHolder<>(); + Utils.ValueHolder faultyDocumentCollection = new Utils.ValueHolder<>(); + + assertThat(operationInvocationParamsWrapper.faultyFeedRange).isNotNull().as("Argument 'operationInvocationParamsWrapper.faultyFeedRange' cannot be null!"); + + if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangeEpkImpl) { + + faultyFeedRangeEpkImpl.v = (FeedRangeEpkImpl) operationInvocationParamsWrapper.faultyFeedRange; + + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangeEpkImpl.v.getRange(), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangePartitionKeyImpl) { + + faultyFeedRangePartitionKeyImpl.v = (FeedRangePartitionKeyImpl) operationInvocationParamsWrapper.faultyFeedRange; + + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangePartitionKeyImpl.v.getEffectiveRange(collection.getPartitionKey()), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else { + fail("Argument 'operationInvocationParamsWrapper.faultyFeedRange' has to be a sub-type of FeedRangeEpkImpl or FeedRangePartitionKeyImpl!"); + } + + validateNonEmptyList(faultyPartitionKeyRanges.v); + assertThat(faultyDocumentCollection.v).isNotNull(); + + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); + + if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { + + operationInvocationParamsWrapper.asyncContainer = container; + operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = operationInvocationParamsWrapper.faultyFeedRange; + operationInvocationParamsWrapper.feedRangeForQuery = operationInvocationParamsWrapper.faultyFeedRange; + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); + + boolean hasReachedCircuitBreakingThreshold = false; + int executionCountAfterCircuitBreakingThresholdBreached = 0; + + List testObjects = operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith; + PartitionKeyRangeWrapper partitionKeyRangeWrapper + = new PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId()); + + for (int i = 1; i <= operationIterationCountInFailureFlow; i++) { + + if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { + validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); + } + + ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + + ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker(); + + int expectedCircuitBreakingThreshold + = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, false) : + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); + + if (!hasReachedCircuitBreakingThreshold) { + + hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == + getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( + partitionKeyRangeWrapper, + partitionKeyRangeToLocationSpecificUnavailabilityInfo, + locationEndpointToLocationSpecificContextForPartitionField, + expectedCircuitBreakingThreshold, + expectedRegionCountWithFailures); + validateResponseInPresenceOfFailures.accept(response); + } else { + executionCountAfterCircuitBreakingThresholdBreached++; + } + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateResponseInAbsenceOfFailures.accept(response); + } + + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + + if (!hasReachedCircuitBreakingThreshold) { + CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); + + validateRegionsContactedWhenExceptionBubblesUp.accept(ctx); + } + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } + } + } + + logger.info("Sleep for 90 seconds to allow Unavailable partitions to be HealthyTentative"); + Thread.sleep(90_000); + + for (int i = operationIterationCountInFailureFlow + 1; i <= operationIterationCountInFailureFlow + operationIterationCountInRecoveryFlow; i++) { + + if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { + validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); + } + + ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + validateResponseInAbsenceOfFailures.accept(response); + + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + + response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } + } + } + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + safeClose(client); + } + } + + private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperationType faultInjectionOperationType, int opCount) { + switch (faultInjectionOperationType) { + case READ_ITEM: + case UPSERT_ITEM: + case REPLACE_ITEM: + case QUERY_ITEM: + case PATCH_ITEM: + case READ_FEED_ITEM: + return 1; + case DELETE_ITEM: + return 2 * opCount; + case CREATE_ITEM: + case BATCH_ITEM: + return 0; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); + } + } + + private static Function> resolveDataPlaneOperation(FaultInjectionOperationType faultInjectionOperationType) { + + switch (faultInjectionOperationType) { + case READ_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; + + try { + + CosmosItemResponse readItemResponse = asyncContainer.readItem( + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + itemRequestOptions, + TestObject.class) + .block(); + + return new ResponseWrapper<>(readItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case UPSERT_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; + + try { + + CosmosItemResponse upsertItemResponse = asyncContainer.upsertItem( + createdTestObject, + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); + + return new ResponseWrapper<>(upsertItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case CREATE_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + TestObject createdTestObject = TestObject.create(); + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; + + try { + + CosmosItemResponse createItemResponse = asyncContainer.createItem( + createdTestObject, + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); + + return new ResponseWrapper<>(createItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case DELETE_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; + + try { + + CosmosItemResponse deleteItemResponse = asyncContainer.deleteItem( + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); + + return new ResponseWrapper<>(deleteItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case PATCH_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosPatchItemRequestOptions patchItemRequestOptions = (CosmosPatchItemRequestOptions) paramsWrapper.patchItemRequestOptions; + + CosmosPatchOperations patchOperations = CosmosPatchOperations.create().add("/number", 555); + + try { + + CosmosItemResponse patchItemResponse = asyncContainer.patchItem( + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + patchOperations, + patchItemRequestOptions, + TestObject.class) + .block(); + + return new ResponseWrapper<>(patchItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case QUERY_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; + queryRequestOptions = queryRequestOptions.setFeedRange(paramsWrapper.feedRangeForQuery); + + try { + + FeedResponse queryItemResponse = asyncContainer.queryItems( + "SELECT * FROM C", + queryRequestOptions, + TestObject.class) + .byPage() + .blockLast(); + + return new ResponseWrapper<>(queryItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case REPLACE_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; + + try { + + CosmosItemResponse deleteItemResponse = asyncContainer.replaceItem( + createdTestObject, + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); + + return new ResponseWrapper<>(deleteItemResponse); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case BATCH_ITEM: + return (paramsWrapper) -> { + + TestObject testObject = TestObject.create(); + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(testObject.getId())); + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + + batch.createItemOperation(testObject); + batch.readItemOperation(testObject.getId()); + + try { + CosmosBatchResponse batchResponse = asyncContainer.executeCosmosBatch(batch).block(); + return new ResponseWrapper<>(batchResponse); + } catch (Exception ex) { + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + case READ_FEED_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + + try { + + FeedResponse feedResponseFromChangeFeed = asyncContainer.queryChangeFeed( + CosmosChangeFeedRequestOptions.createForProcessingFromBeginning(paramsWrapper.feedRangeToDrainForChangeFeed), + TestObject.class) + .byPage() + .blockLast(); + + return new ResponseWrapper<>(feedResponseFromChangeFeed); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); + } + } + + private String resolveContainerIdByFaultInjectionOperationType(FaultInjectionOperationType faultInjectionOperationType) { + switch (faultInjectionOperationType) { + case READ_ITEM: + case UPSERT_ITEM: + case REPLACE_ITEM: + case QUERY_ITEM: + case PATCH_ITEM: + return this.sharedMultiPartitionAsyncContainerIdWhereIdIsPartitionKey; + case DELETE_ITEM: + case CREATE_ITEM: + case BATCH_ITEM: + case READ_FEED_ITEM: + return this.singlePartitionAsyncContainerId; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); + } + } + + @AfterClass(groups = {"circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}) + public void afterClass() { + CosmosClientBuilder clientBuilder = new CosmosClientBuilder() + .endpoint(TestConfigurations.HOST) + .key(TestConfigurations.MASTER_KEY) + .contentResponseOnWriteEnabled(true) + .directMode(); + + CosmosAsyncClient dummyClient = null; + if (this.sharedAsyncDatabaseId != null) { + try { + + dummyClient = clientBuilder.buildAsyncClient(); + + CosmosAsyncDatabase sharedAsyncDatabase = dummyClient + .getDatabase(this.sharedAsyncDatabaseId); + CosmosAsyncContainer singlePartitionAsyncContainer = + sharedAsyncDatabase.getContainer(this.singlePartitionAsyncContainerId); + + safeDeleteCollection(singlePartitionAsyncContainer); + } finally { + safeClose(dummyClient); + } + } + } + + private static class ResponseWrapper { + + private final CosmosItemResponse cosmosItemResponse; + private final CosmosException cosmosException; + private final FeedResponse feedResponse; + private final CosmosBatchResponse batchResponse; + + ResponseWrapper(FeedResponse feedResponse) { + this.feedResponse = feedResponse; + this.cosmosException = null; + this.cosmosItemResponse = null; + this.batchResponse = null; + } + + ResponseWrapper(CosmosItemResponse cosmosItemResponse) { + this.cosmosItemResponse = cosmosItemResponse; + this.cosmosException = null; + this.feedResponse = null; + this.batchResponse = null; + } + + ResponseWrapper(CosmosException cosmosException) { + this.cosmosException = cosmosException; + this.cosmosItemResponse = null; + this.feedResponse = null; + this.batchResponse = null; + } + + ResponseWrapper(CosmosBatchResponse batchResponse) { + this.cosmosException = null; + this.cosmosItemResponse = null; + this.feedResponse = null; + this.batchResponse = batchResponse; + } + } + + private static class OperationInvocationParamsWrapper { + public CosmosAsyncContainer asyncContainer; + public TestObject createdTestObject; + public CosmosItemRequestOptions itemRequestOptions; + public CosmosQueryRequestOptions queryRequestOptions; + public CosmosReadManyRequestOptions readManyRequestOptions; + public CosmosItemRequestOptions patchItemRequestOptions; + public FeedRange feedRangeToDrainForChangeFeed; + public FeedRange feedRangeForQuery; + public List itemIdentitiesForReadManyOperation; + public PartitionKey partitionKeyForReadAllOperation; + public String containerIdToTarget; + public int itemCountToBootstrapContainerFrom; + public FeedRange faultyFeedRange; + public List testObjectsForDataPlaneOperationToWorkWith; + public QueryType queryType; + } + + private static class FaultInjectionRuleParamsWrapper { + + private CosmosAsyncContainer faultInjectionApplicableAsyncContainer; + private Integer hitLimit; + private Duration responseDelay; + private Duration faultInjectionDuration; + private List faultInjectionApplicableRegions; + private FeedRange faultInjectionApplicableFeedRange; + private FaultInjectionOperationType faultInjectionOperationType; + private FaultInjectionConnectionType faultInjectionConnectionType; + + public CosmosAsyncContainer getFaultInjectionApplicableAsyncContainer() { + return faultInjectionApplicableAsyncContainer; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionApplicableAsyncContainer(CosmosAsyncContainer faultInjectionApplicableAsyncContainer) { + this.faultInjectionApplicableAsyncContainer = faultInjectionApplicableAsyncContainer; + return this; + } + + public Integer getHitLimit() { + return hitLimit; + } + + public FaultInjectionRuleParamsWrapper withHitLimit(Integer hitLimit) { + this.hitLimit = hitLimit; + return this; + } + + public Duration getResponseDelay() { + return responseDelay; + } + + public FaultInjectionRuleParamsWrapper withResponseDelay(Duration responseDelay) { + this.responseDelay = responseDelay; + return this; + } + + public Duration getFaultInjectionDuration() { + return faultInjectionDuration; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionDuration(Duration faultInjectionDuration) { + this.faultInjectionDuration = faultInjectionDuration; + return this; + } + + public List getFaultInjectionApplicableRegions() { + return faultInjectionApplicableRegions; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionApplicableRegions(List faultInjectionApplicableRegions) { + this.faultInjectionApplicableRegions = faultInjectionApplicableRegions; + return this; + } + + public FeedRange getFaultInjectionApplicableFeedRange() { + return faultInjectionApplicableFeedRange; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionApplicableFeedRange(FeedRange faultInjectionApplicableFeedRange) { + this.faultInjectionApplicableFeedRange = faultInjectionApplicableFeedRange; + return this; + } + + public FaultInjectionOperationType getFaultInjectionOperationType() { + return faultInjectionOperationType; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionOperationType(FaultInjectionOperationType faultInjectionOperationType) { + this.faultInjectionOperationType = faultInjectionOperationType; + return this; + } + + public FaultInjectionConnectionType getFaultInjectionConnectionType() { + return faultInjectionConnectionType; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionConnectionType(FaultInjectionConnectionType faultInjectionConnectionType) { + this.faultInjectionConnectionType = faultInjectionConnectionType; + return this; + } + } + + private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { + Iterator locationIterator = + writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); + Map regionMap = new ConcurrentHashMap<>(); + + while (locationIterator.hasNext()) { + DatabaseAccountLocation accountLocation = locationIterator.next(); + regionMap.put(accountLocation.getName(), accountLocation.getEndpoint()); + } + + return regionMap; + } + + private static List buildServiceUnavailableFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(paramsWrapper.getHitLimit()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildServerGeneratedGoneErrorFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.GONE) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("gone-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildTransitTimeoutFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) + .delay(paramsWrapper.getResponseDelay()) + .suppressServiceRequests(false) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("response-delay-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildReadWriteSessionNotAvailableFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildTooManyRequestsErrorFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.TOO_MANY_REQUEST) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("too-many-requests-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildInternalServerErrorFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("internal-server-error-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(paramsWrapper.getHitLimit()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildRetryWithFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RETRY_WITH) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("retry-with-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static boolean doesOperationHaveWriteSemantics(FaultInjectionOperationType faultInjectionOperationType) { + switch (faultInjectionOperationType) { + + case DELETE_ITEM: + case PATCH_ITEM: + case UPSERT_ITEM: + case BATCH_ITEM: + case REPLACE_ITEM: + case CREATE_ITEM: + return true; + case READ_ITEM: + case QUERY_ITEM: + case READ_FEED_ITEM: + return false; + default: + throw new IllegalArgumentException("Unsupported operation type : " + faultInjectionOperationType); + } + } + + private static void validateNonEmptyString(String input) { + assertThat(input).isNotNull(); + assertThat(input).isNotEmpty(); + } + + private static void validateNonEmptyList(List list) { + assertThat(list).isNotNull(); + assertThat(list).isNotEmpty(); + } + + private static void deleteAllDocuments(CosmosAsyncContainer asyncContainer) { + asyncContainer + .queryItems("SELECT * FROM C", TestObject.class) + .collectList() + .flatMapMany(Flux::fromIterable) + .flatMap(testObject -> asyncContainer.deleteItem(testObject.getId(), new PartitionKey(testObject.getMypk()))) + .blockLast(); + } + + private static Class getClassBySimpleName(Class[] classes, String classSimpleName) { + for (Class clazz : classes) { + if (clazz.getSimpleName().equals(classSimpleName)) { + return clazz; + } + } + + logger.warn("Class with simple name {} does not exist!", classSimpleName); + return null; + } + + private static double getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo, + Field locationEndpointToLocationSpecificContextForPartitionField, + int allowedExceptionCountToMaintainHealthyWithFailuresStatus, + int expectedRegionCountWithFailures) throws IllegalAccessException { + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + if (partitionAndLocationSpecificUnavailabilityInfo == null) { + return 0d; + } + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + int count = 0; + boolean failuresExist = false; + + for (LocationSpecificHealthContext locationSpecificHealthContext : locationEndpointToLocationSpecificContextForPartition.values()) { + + if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { + count += allowedExceptionCountToMaintainHealthyWithFailuresStatus; + } else { + count += locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + } + + if (locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking() > 0) { + failuresExist = true; + } + } + + if (failuresExist) { + return (count * 1.0d) / (expectedRegionCountWithFailures * 1.0d); + } + + return 0d; + } + + private static FaultInjectionConnectionType evaluateFaultInjectionConnectionType(ConnectionMode connectionMode) { + + if (connectionMode == ConnectionMode.DIRECT) { + return FaultInjectionConnectionType.DIRECT; + } else if (connectionMode == ConnectionMode.GATEWAY) { + return FaultInjectionConnectionType.GATEWAY; + } + + throw new IllegalArgumentException("Unsupported connection mode : " + connectionMode); + } + + private enum QueryType { + READ_MANY, READ_ALL + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java index 9329fb405a2d..c3e720a90d95 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java @@ -11,6 +11,7 @@ import com.azure.cosmos.CosmosRegionSwitchHint; import com.azure.cosmos.SessionRetryOptions; import com.azure.cosmos.SessionRetryOptionsBuilder; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.directconnectivity.RntbdTransportClient; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.implementation.http.HttpClientConfig; @@ -73,7 +74,8 @@ public Object[][] clientCfgProvider() { aggressiveWarmUpDuration1, proactiveConnectionRegionCount1, cosmosContainerIdentities, - false // is region scoped session capturing enabled + false, // is region scoped session capturing enabled + false // is partition-level circuit breaking enabled }, { new CosmosContainerProactiveInitConfigBuilder(cosmosContainerIdentities) @@ -83,7 +85,8 @@ public Object[][] clientCfgProvider() { aggressiveWarmUpDuration2, proactiveConnectionRegionCount2, cosmosContainerIdentities, - true + true, // is region scoped session capturing enabled + false // is partition-level circuit breaking enabled }, { new CosmosContainerProactiveInitConfigBuilder(cosmosContainerIdentities) @@ -92,7 +95,8 @@ public Object[][] clientCfgProvider() { null, proactiveConnectionRegionCount3, cosmosContainerIdentities, - false + false, // is region scoped session capturing enabled + true // is partition-level circuit breaking enabled } }; } @@ -243,7 +247,8 @@ public void full( Duration aggressiveWarmupDuration, int proactiveConnectionRegionCount, List cosmosContainerIdentities, - boolean isRegionScopedSessionCapturingEnabled) throws Exception { + boolean isRegionScopedSessionCapturingEnabled, + boolean isPartitionLevelCircuitBreakerEnabled) throws Exception { DiagnosticsClientContext clientContext = Mockito.mock(DiagnosticsClientContext.class); System.setProperty("COSMOS.REPLICA_ADDRESS_VALIDATION_ENABLED", "false"); @@ -277,6 +282,19 @@ public void full( diagnosticsClientConfig.withRegionScopedSessionContainerOptions(regionScopedSessionContainer); } + if (isPartitionLevelCircuitBreakerEnabled) { + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + + PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); + diagnosticsClientConfig.withPartitionLevelCircuitBreakerConfig(partitionLevelCircuitBreakerConfig); + } + Mockito.doReturn(diagnosticsClientConfig).when(clientContext).getConfig(); StringWriter jsonWriter = new StringWriter(); @@ -301,11 +319,18 @@ public void full( assertThat(objectNode.get("regionScopedSessionCfg")).isNull(); } + if (isPartitionLevelCircuitBreakerEnabled) { + assertThat(objectNode.get("partitionLevelCircuitBreakerCfg").asText()).isEqualTo("(cb: true, type: CONSECUTIVE_EXCEPTION_COUNT_BASED, rexcntt: 10, wexcntt: 5)"); + } else { + assertThat(objectNode.get("partitionLevelCircuitBreakerCfg")).isNull(); + } + String expectedProactiveInitConfigString = reconstructProactiveInitConfigString(cosmosContainerIdentities, aggressiveWarmupDuration, proactiveConnectionRegionCount); assertThat(objectNode.get("proactiveInitCfg").asText()).isEqualTo(expectedProactiveInitConfigString); System.clearProperty("COSMOS.REPLICA_ADDRESS_VALIDATION_ENABLED"); + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @Test(groups = {"unit"}, dataProvider = "sessionRetryOptionsConfigProvider") diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java index e2242f9fef4e..2e4ae2c9f956 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java @@ -6,6 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosException; import com.azure.cosmos.ThrottlingRetryOptions; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.ChannelAcquisitionException; import io.netty.handler.timeout.ReadTimeoutException; import io.reactivex.subscribers.TestSubscriber; @@ -63,9 +64,10 @@ public static Object[][] tcpNetworkFailureOnWriteArgProvider() { public void networkFailureOnRead() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException"); CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -102,6 +104,8 @@ public void shouldRetryOnGatewayTimeout( boolean shouldCrossRegionRetry) throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(true)); ClientRetryPolicy clientRetryPolicy = @@ -110,7 +114,8 @@ public void shouldRetryOnGatewayTimeout( endpointManager, true, throttlingRetryOptions, - null); + null, + globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.REQUEST_TIMEOUT, exception); @@ -142,10 +147,12 @@ public void shouldRetryOnGatewayTimeout( public void tcpNetworkFailureOnRead() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; GoneException goneException = new GoneException(exception); @@ -188,9 +195,11 @@ public void tcpNetworkFailureOnRead() throws Exception { public void networkFailureOnWrite() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException");; CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -221,10 +230,12 @@ public void tcpNetworkFailureOnWrite( boolean shouldRetry) throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); //Non retribale exception for write GoneException goneException = new GoneException(exception); @@ -279,9 +290,11 @@ public void tcpNetworkFailureOnWrite( public void networkFailureOnUpsert() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException"); CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -310,10 +323,12 @@ public void networkFailureOnUpsert() throws Exception { public void tcpNetworkFailureOnUpsert() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException"); GoneException goneException = new GoneException(exception); @@ -344,9 +359,11 @@ public void tcpNetworkFailureOnUpsert() throws Exception { public void networkFailureOnDelete() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; CosmosException cosmosException = BridgeInternal.createCosmosException( @@ -376,10 +393,12 @@ public void networkFailureOnDelete() throws Exception { public void tcpNetworkFailureOnDelete() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; GoneException goneException = new GoneException(exception); @@ -410,9 +429,10 @@ public void tcpNetworkFailureOnDelete() throws Exception { public void onBeforeSendRequestNotInvoked() { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java index b3027ce8782e..7dc4619fee59 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java @@ -4,21 +4,20 @@ package com.azure.cosmos.implementation; +import com.azure.cosmos.BridgeInternal; +import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.GatewayConnectionConfig; import com.azure.cosmos.implementation.apachecommons.collections.map.UnmodifiableMap; -import com.azure.cosmos.BridgeInternal; -import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; +import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; +import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.models.CosmosClientTelemetryConfig; -import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.models.PartitionKey; import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.PartitionKind; -import com.azure.cosmos.implementation.directconnectivity.WFConstants; -import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; -import com.azure.cosmos.implementation.routing.Range; import org.apache.commons.lang3.StringUtils; import org.assertj.core.api.Assertions; import org.testng.SkipException; @@ -147,10 +146,10 @@ void validateStrongConsistencyOnAsyncReplication(boolean useGateway) throws Inte Document documentDefinition = getDocumentDefinition(); Document document = createDocument(this.writeClient, createdDatabase.getId(), createdCollection.getId(), documentDefinition); - validateStrongConsistency(document); + validateStrongConsistency(document, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); } - void validateStrongConsistency(Document documentToWorkWith) throws InterruptedException { + void validateStrongConsistency(Document documentToWorkWith, String collectionLink) throws InterruptedException { int numberOfTestIteration = 5; Document writeDocument = documentToWorkWith; while (numberOfTestIteration-- > 0) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java index dc21e6116ef5..5a352678a334 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java @@ -4,8 +4,8 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.WFConstants; -import com.azure.cosmos.models.ModelBridgeInternal; import io.netty.handler.timeout.ReadTimeoutException; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -23,9 +23,10 @@ public class RenameCollectionAwareClientRetryPolicyTest { @Test(groups = "unit", timeOut = TIMEOUT) public void onBeforeSendRequestNotInvoked() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); @@ -52,8 +53,9 @@ public void onBeforeSendRequestNotInvoked() { @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithNotFoundStatusCode() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); @@ -78,8 +80,10 @@ public void shouldRetryWithNotFoundStatusCode() { @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithNotFoundStatusCodeAndReadSessionNotAvailableSubStatusCode() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); @@ -115,8 +119,10 @@ public void shouldRetryWithNotFoundStatusCodeAndReadSessionNotAvailableSubStatus @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithGenericException() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java index 27a7fd407092..0bafee942c5f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java @@ -110,14 +110,20 @@ public void setUp() { this.defaultItemSerializer = Mockito.mock(CosmosItemSerializer.class); } - @Test(groups = {"unit"}) + // todo: fix and revert enabled = false when circuit breaker is enabled + @Test(groups = {"unit"}, enabled = true) public void readMany() { // setup static method mocks MockedStatic httpClientMock = Mockito.mockStatic(HttpClient.class); MockedStatic partitionKeyInternalHelperMock = Mockito.mockStatic(PartitionKeyInternalHelper.class); MockedStatic documentQueryExecutionFactoryMock = Mockito.mockStatic(DocumentQueryExecutionContextFactory.class); - MockedStatic observableHelperMock = Mockito.mockStatic(ObservableHelper.class); +// MockedStatic observableHelperMock = Mockito.mockStatic(ObservableHelper.class); + + // setup mocks + DocumentClientRetryPolicy documentClientRetryPolicyMock = Mockito.mock(DocumentClientRetryPolicy.class); + RxGatewayStoreModel gatewayStoreModelMock = Mockito.mock(RxGatewayStoreModel.class); + RxStoreModel serverStoreModelMock = Mockito.mock(RxStoreModel.class); // dummy values PartitionKeyRange dummyPartitionKeyRange1 = new PartitionKeyRange() @@ -196,19 +202,28 @@ public void readMany() { Mockito.any() )) .thenReturn(Flux.just(dummyExecutionContextForQuery(queryResults, headersForQueries, InternalObjectNode.class))); - observableHelperMock - .when(() -> ObservableHelper.inlineIfPossibleAsObs(Mockito.any(), Mockito.any())) - .thenReturn(Mono.just(dummyResourceResponse(pointReadResult, headersForPointReads))); Mockito .when(this.collectionCacheMock.resolveCollectionAsync(Mockito.isNull(), Mockito.any(RxDocumentServiceRequest.class))) .thenReturn(Mono.just(dummyCollectionObs())); + + Mockito + .when(this.collectionCacheMock.resolveByNameAsync(Mockito.any(), Mockito.anyString(), Mockito.isNull())) + .thenReturn(Mono.just(dummyCollectionObs().v)); + Mockito .when(this.partitionKeyRangeCacheMock.tryLookupAsync(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any())) .thenReturn(Mono.just(dummyCollectionRoutingMap(epksPartitionKeyRangeMap))); - Mockito.when(this.resetSessionTokenRetryPolicyMock.getRequestPolicy(null)).thenReturn(dummyDocumentClientRetryPolicy()); + RetryContext retryContext = new RetryContext(); + Mockito.when(this.resetSessionTokenRetryPolicyMock.getRequestPolicy(null)).thenReturn(dummyDocumentClientRetryPolicy()); + Mockito.when(this.cosmosAuthorizationTokenResolverMock.getAuthorizationToken(Mockito.anyString(), Mockito.anyString(), Mockito.anyString(), Mockito.any())).thenReturn("abcdefgh"); + Mockito.when(this.resetSessionTokenRetryPolicyMock.getRequestPolicy(Mockito.any())).thenReturn(documentClientRetryPolicyMock); + Mockito.when(documentClientRetryPolicyMock.getRetryContext()).thenReturn(retryContext); + Mockito + .when(serverStoreModelMock.processMessage(Mockito.any(RxDocumentServiceRequest.class))) + .thenReturn(Mono.just(mockRxDocumentServiceResponse(pointReadResult, headersForPointReads))); // initialize object to be tested RxDocumentClientImpl rxDocumentClient = new RxDocumentClientImpl( @@ -237,6 +252,8 @@ public void readMany() { ReflectionUtils.setCollectionCache(rxDocumentClient, this.collectionCacheMock); ReflectionUtils.setPartitionKeyRangeCache(rxDocumentClient, this.partitionKeyRangeCacheMock); ReflectionUtils.setResetSessionTokenRetryPolicy(rxDocumentClient, this.resetSessionTokenRetryPolicyMock); + ReflectionUtils.setGatewayProxy(rxDocumentClient, gatewayStoreModelMock); + ReflectionUtils.setServerStoreModel(rxDocumentClient, serverStoreModelMock); ArrayList cosmosItemIdentities = new ArrayList(); @@ -289,7 +306,6 @@ public void readMany() { // release static mocks httpClientMock.close(); partitionKeyInternalHelperMock.close(); - observableHelperMock.close(); documentQueryExecutionFactoryMock.close(); // de-register client @@ -413,8 +429,7 @@ public RetryContext getRetryContext() { }; } - private static ResourceResponse dummyResourceResponse(String content, Map headers) { - + private static RxDocumentServiceResponse mockRxDocumentServiceResponse(String content, Map headers) { byte[] blob = content.getBytes(StandardCharsets.UTF_8); StoreResponse storeResponse = new StoreResponse( HttpResponseStatus.OK.code(), @@ -452,7 +467,7 @@ public CosmosDiagnostics getMostRecentlyCreatedDiagnostics() { documentServiceResponse.setCosmosDiagnostics(dummyCosmosDiagnostics()); - return new ResourceResponse<>(documentServiceResponse, Document.class); + return documentServiceResponse; } private static CosmosDiagnostics dummyCosmosDiagnostics() { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index ce8606e9529d..77ac38573ca7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -5,6 +5,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.cosmos.ClientUnderTestBuilder; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.http.HttpClient; import com.azure.cosmos.implementation.http.HttpRequest; import com.azure.cosmos.implementation.http.HttpResponse; @@ -70,6 +71,7 @@ RxGatewayStoreModel createRxGatewayProxy( QueryCompatibilityMode queryCompatibilityMode, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker, HttpClient rxOrigClient, ApiType apiType) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index 4b99500c971a..0ea494c8dbc6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -5,6 +5,7 @@ import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.GatewayServiceConfigurationReader; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.http.HttpClient; @@ -79,6 +80,8 @@ public void readTimeout() throws Exception { QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; UserAgentContainer userAgentContainer = new UserAgentContainer(); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); HttpClient httpClient = Mockito.mock(HttpClient.class); @@ -121,6 +124,7 @@ public void serviceUnavailable() throws Exception { QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; UserAgentContainer userAgentContainer = new UserAgentContainer(); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); HttpClient httpClient = Mockito.mock(HttpClient.class); @@ -173,6 +177,8 @@ public void applySessionToken( Mockito.doReturn(sdkGlobalSessionToken).when(sessionContainer).resolveGlobalSessionToken(any()); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); @@ -242,6 +248,8 @@ public void validateApiType() throws Exception { Mockito.doReturn(sdkGlobalSessionToken).when(sessionContainer).resolveGlobalSessionToken(any()); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index 939038579a1c..2798d707dc8c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -5,6 +5,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.cosmos.ConnectionMode; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.Protocol; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.http.HttpClient; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java index 21ca9330845e..6930aab6d800 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java @@ -586,24 +586,11 @@ public static void deleteDocumentIfExists(AsyncDocumentClient client, String dat Document.class) .single().block().getResults(); if (!res.isEmpty()) { - deleteDocument(client, TestUtils.getDocumentNameLink(databaseId, collectionId, docId), pk); + deleteDocument(client, TestUtils.getDocumentNameLink(databaseId, collectionId, docId), pk, TestUtils.getCollectionNameLink(databaseId, collectionId)); } } - public static void safeDeleteDocument(AsyncDocumentClient client, String documentLink, RequestOptions options) { - if (client != null && documentLink != null) { - try { - client.deleteDocument(documentLink, options).block(); - } catch (Exception e) { - CosmosException dce = Utils.as(e, CosmosException.class); - if (dce == null || dce.getStatusCode() != 404) { - throw e; - } - } - } - } - - public static void deleteDocument(AsyncDocumentClient client, String documentLink, PartitionKey pk) { + public static void deleteDocument(AsyncDocumentClient client, String documentLink, PartitionKey pk, String collectionLink) { RequestOptions options = new RequestOptions(); options.setPartitionKey(pk); client.deleteDocument(documentLink, options).block(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index 046c88a73a52..4cd7f3e153f5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -9,7 +9,6 @@ import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InvalidPartitionException; -import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.NotFoundException; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; @@ -26,7 +25,6 @@ import com.azure.cosmos.implementation.routing.InMemoryCollectionRoutingMap; import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; import com.azure.cosmos.implementation.routing.PartitionKeyRangeIdentity; -import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.models.PartitionKey; import com.azure.cosmos.models.PartitionKeyDefinition; import org.apache.commons.lang3.NotImplementedException; @@ -65,7 +63,6 @@ public class AddressResolverTest { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache fabricAddressCache; - private int collectionCacheRefreshedCount; private Map routingMapRefreshCount; private Map addressesRefreshCount; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index e890168543cc..73c4110cea1c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -7,11 +7,11 @@ import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.CosmosContainerProactiveInitConfig; import com.azure.cosmos.CosmosContainerProactiveInitConfigBuilder; -import com.azure.cosmos.implementation.AsyncDocumentClient; import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.OperationType; @@ -28,7 +28,6 @@ import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; import com.azure.cosmos.implementation.routing.PartitionKeyRangeIdentity; import com.azure.cosmos.models.CosmosContainerIdentity; -import com.azure.cosmos.models.ModelBridgeInternal; import org.mockito.ArgumentMatchers; import org.mockito.Mockito; import org.testng.annotations.BeforeClass; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 7edd63274342..5613b3866d95 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -11,6 +11,7 @@ import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IRetryPolicyFactory; import com.azure.cosmos.implementation.PartitionKeyRange; @@ -119,9 +120,11 @@ private IRetryPolicyFactory mockDocumentClientIRetryPolicyFactory() { } GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + Mockito.doReturn(url).when(globalEndpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); doReturn(false).when(globalEndpointManager).isClosed(); - return new RetryPolicy(mockDiagnosticsClientContext(), globalEndpointManager, ConnectionPolicy.getDefaultPolicy()); + return new RetryPolicy(mockDiagnosticsClientContext(), globalEndpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); } @Test(groups = {"unit"}, dataProvider = "splitParamProvider", timeOut = TIMEOUT) @@ -545,6 +548,8 @@ public void simple() { , responses)); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); RxDocumentServiceRequest req = invocation.getArgument(3); @@ -552,7 +557,21 @@ public void simple() { invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer<>( @@ -631,6 +650,8 @@ public void retries() { behaviourAfterException); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); RxDocumentServiceRequest req = invocation.getArgument(3); @@ -638,7 +659,20 @@ public void retries() { invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = @@ -722,6 +756,8 @@ public void retriesExhausted() { exceptionBehaviour); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); RxDocumentServiceRequest req = invocation.getArgument(3); @@ -729,7 +765,21 @@ public void retriesExhausted() { invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer( @@ -843,6 +893,7 @@ private int getLastValueInAsc(int initialValue, List> res private IDocumentQueryClient mockQueryClient(List replacementRanges) { IDocumentQueryClient client = Mockito.mock(IDocumentQueryClient.class); RxPartitionKeyRangeCache cache = Mockito.mock(RxPartitionKeyRangeCache.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); @@ -851,10 +902,26 @@ private IDocumentQueryClient mockQueryClient(List replacement invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(client).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(client).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); + doReturn(cache).when(client).getPartitionKeyRangeCache(); + doReturn(Mono.just(new Utils.ValueHolder<>(replacementRanges))) .when(cache).tryGetOverlappingRangesAsync(any(), any(), any(), anyBoolean(), ArgumentMatchers.any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(client).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(client).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(client).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + return client; } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java index 2548ca6e8db1..800e2203a159 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java @@ -3,6 +3,8 @@ package com.azure.cosmos.implementation.query; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; @@ -12,6 +14,7 @@ import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.models.ModelBridgeInternal; import io.reactivex.subscribers.TestSubscriber; +import org.mockito.Mockito; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import reactor.core.publisher.Mono; @@ -60,6 +63,9 @@ public static Object[][] queryParamProvider() { @Test(groups = { "unit" }, dataProvider = "queryParams") public void query(CosmosQueryRequestOptions options, int top) { + GlobalEndpointManager globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreakerMock = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + FeedResponse fp1 = FeedResponseBuilder.queryFeedResponseBuilder(Document.class) .withContinuationToken("cp1") .withResults(new Document(), new Document(), new Document()) @@ -95,6 +101,8 @@ public void query(CosmosQueryRequestOptions options, int top) { return Mono.just(rsp); }; + Mockito.when(globalPartitionEndpointManagerForCircuitBreakerMock.isPartitionLevelCircuitBreakingApplicable(Mockito.any())).thenReturn(false); + ServerSideOnlyContinuationFetcherImpl fetcher = new ServerSideOnlyContinuationFetcherImpl<>(createRequestFunc, executeFunc, ModelBridgeInternal.getRequestContinuationFromQueryRequestOptions(options), false, top, ModelBridgeInternal.getMaxItemCountFromQueryRequestOptions(options), @@ -106,7 +114,9 @@ public void query(CosmosQueryRequestOptions options, int top) { ImplementationBridgeHelpers .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() - .getCancelledRequestDiagnosticsTracker(options)); + .getCancelledRequestDiagnosticsTracker(options), + globalEndpointManagerMock, + globalPartitionEndpointManagerForCircuitBreakerMock); validateFetcher(fetcher, options, top, feedResponseList); } @@ -134,6 +144,9 @@ private void validateFetcher(ServerSideOnlyContinuationFetcherImpl fet @Test(groups = { "unit" }) public void changeFeed() { + GlobalEndpointManager globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreakerMock = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + CosmosChangeFeedRequestOptions options = CosmosChangeFeedRequestOptions.createForProcessingFromBeginning( FeedRangeEpkImpl.forFullRange()); @@ -164,6 +177,8 @@ public void changeFeed() { Function>> executeFunc = request -> Mono.just(feedResponseList.get(executeIndex.getAndIncrement())); + Mockito.when(globalPartitionEndpointManagerForCircuitBreakerMock.isPartitionLevelCircuitBreakingApplicable(Mockito.any())).thenReturn(false); + ServerSideOnlyContinuationFetcherImpl fetcher = new ServerSideOnlyContinuationFetcherImpl<>(createRequestFunc, executeFunc, null, isChangeFeed, top, options.getMaxItemCount(), @@ -171,7 +186,9 @@ public void changeFeed() { .CosmosChangeFeedRequestOptionsHelper .getCosmosChangeFeedRequestOptionsAccessor() .getOperationContext(options), - null); + null, + globalEndpointManagerMock, + globalPartitionEndpointManagerForCircuitBreakerMock); validateFetcher(fetcher, options, feedResponseList); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java index 58cf734a970a..9bfbe3d77634 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java @@ -5,6 +5,7 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.Resource; import com.azure.cosmos.implementation.ResourceType; @@ -59,9 +60,13 @@ public void requestCreationOnSplitScenario() { PartitionKeyRange partitionKey = new PartitionKeyRange("0", "00", "FF"); Map rangeQueryMap = new HashMap<>(); rangeQueryMap.put(partitionKey, querySpec); + + DocumentCollection documentCollection = new DocumentCollection(); + documentCollection.setResourceId("testCollectionRid"); + parallelDocumentQueryExecutionContextBase.initializeReadMany( rangeQueryMap, - new CosmosQueryRequestOptions(), "testCollectionRid"); + new CosmosQueryRequestOptions(), documentCollection); //Parent document producer created DocumentProducer documentProducer = parallelDocumentQueryExecutionContextBase.documentProducers.get(0); @@ -116,7 +121,8 @@ protected DocumentProducer createDocumentProducer(String collectionRid, Function>> executeFunc, Supplier createRetryPolicyFunc, - FeedRangeEpkImpl feedRange) { + FeedRangeEpkImpl feedRange, + String collectionLink) { return new DocumentProducer( client, collectionRid, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java index 0c5792402d8a..75d670da5c91 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java @@ -221,7 +221,7 @@ private void changeFeed_withUpdatesAndDelete(boolean enableFullFidelityChangeFee .getContinuationToken(); Document docToBeDeleted = partitionKeyToDocuments.get(partitionKey).stream().findFirst().get(); - deleteDocument(client, docToBeDeleted.getSelfLink(), new PartitionKey(partitionKey)); + deleteDocument(client, docToBeDeleted.getSelfLink(), new PartitionKey(partitionKey), TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); CosmosChangeFeedRequestOptions changeFeedOptionForContinuationAfterDeletes = CosmosChangeFeedRequestOptions diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java index 4c4c8768ccd1..a94dfea5b85c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java @@ -39,6 +39,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import javax.print.Doc; import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -172,30 +173,30 @@ public Object[][] collectionAndPermissionData() { public Object[][] documentAndPermissionData() { return new Object[][]{ //These tests will try to read document from its own getPermission and validate it, both with request Id and getName. - {createdDocument.getSelfLink(), createdDocPermission, createdDocument.getId(), null}, + {createdDocument.getSelfLink(), createdDocPermission, createdCollection, createdDocument.getId(), null}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollection.getId(), createdDocument.getId()), createdDocPermissionWithName, createdDocument.getId(), null}, //These tests will try to read document from its getPermission having partition getKey 1 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey.getSelfLink(), createdDocPermissionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, + {createdDocumentWithPartitionKey.getSelfLink(), createdDocPermissionWithPartitionKey, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, // TODO uncomment after https://github.com/Azure/azure-sdk-for-java/issues/26050 // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey.getId()), createdDocPermissionWithPartitionKeyWithName // , createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //These tests will try to read document from its getPermission having partition getKey 2 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey2.getSelfLink(), createdDocPermissionWithPartitionKey2, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, + {createdDocumentWithPartitionKey2.getSelfLink(), createdDocPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), // createdDocPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, // These tests will try to read document from its parent collection getPermission and validate it, both with request Id and getName. - {createdDocument.getSelfLink(), createdCollPermission, createdDocument.getId(), null}, + {createdDocument.getSelfLink(), createdCollPermission, createdCollection, createdDocument.getId(), null}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollection.getId(), createdDocument.getId()), createdCollPermissionWithName, createdDocument.getId(), null}, //This test will try to read document from collection getPermission having partition getKey 1 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, + {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //{TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey.getId()), createdColPermissionWithPartitionKeyWithName, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //This test will try to read document from collection getPermission having partition getKey 2 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey2, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, + {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, //{TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), createdColPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2} }; @@ -205,7 +206,7 @@ public Object[][] documentAndPermissionData() { public Object[][] documentAndPermissionDataForResourceNotFound() { return new Object[][]{ //This test will try to read document from its resource token directly and validate it. - {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, PARTITION_KEY_VALUE}, + {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, createdCollectionWithPartitionKey, PARTITION_KEY_VALUE}, //This test will try to read document from its parent collection resource token directly and validate it. // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), // createdColPermissionWithPartitionKeyWithName, PARTITION_KEY_VALUE} @@ -216,13 +217,13 @@ public Object[][] documentAndPermissionDataForResourceNotFound() { public Object[][] documentAndMultipleCollPermissionData() { return new Object[][]{ //These tests will try to read document from partition 1 with two collection getPermissions having different partition keys and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdDocumentWithPartitionKey.getId(), + {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey.getId()), createdColPermissionWithPartitionKeyWithName // , createdColPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //These tests will try to read document from partition 1 with two collection getPermissions having different partition keys and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdDocumentWithPartitionKey2.getId(), + {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), createdColPermissionWithPartitionKeyWithName // , createdColPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2} @@ -286,11 +287,13 @@ public void readCollectionFromPermissionFeed(String collectionUrl, Permission pe * * @throws Exception */ - @Test(groups = { "fast" }, dataProvider = "documentAndPermissionData", timeOut = TIMEOUT) - public void readDocumentFromPermissionFeed(String documentUrl, Permission permission, String documentId, String partitionKey) throws Exception { + @Test(groups = { "fast" }, dataProvider = "documentAndPermissionData"/*, timeOut = TIMEOUT*/) + public void readDocumentFromPermissionFeed(String documentUrl, Permission permission, DocumentCollection documentCollection, String documentId, String partitionKey) throws Exception { AsyncDocumentClient asyncClientResourceToken = null; + try { List permissionFeed = new ArrayList<>(); + permissionFeed.add(permission); ConnectionPolicy defaultPolicy = ConnectionPolicy.getDefaultPolicy(); defaultPolicy.setConnectionMode(ConnectionMode.GATEWAY); @@ -310,6 +313,7 @@ public void readDocumentFromPermissionFeed(String documentUrl, Permission permis } else { options.setPartitionKey(PartitionKey.NONE); } + Mono> readObservable = asyncClientResourceToken .readDocument(documentUrl, options); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() @@ -357,7 +361,7 @@ public void readDocumentFromResouceToken(String resourceToken) throws Exception * @throws Exception */ @Test(groups = {"fast"}, dataProvider = "documentAndMultipleCollPermissionData", timeOut = TIMEOUT) - public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(String documentUrl, Permission collPermission1, Permission collPermission2, String documentId, String partitionKey) throws Exception { + public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(String documentUrl, Permission collPermission1, Permission collPermission2, DocumentCollection documentCollection, String documentId, String partitionKey) throws Exception { AsyncDocumentClient asyncClientResourceToken = null; try { List permissionFeed = new ArrayList<>(); @@ -394,7 +398,7 @@ public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(Strin * @throws Exception */ @Test(groups = { "fast" },dataProvider = "documentAndPermissionDataForResourceNotFound", timeOut = TIMEOUT) - public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound(String documentUrl, Permission permission, String partitionKey) throws Exception { + public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound(String documentUrl, Permission permission, DocumentCollection documentCollection, String partitionKey) throws Exception { AsyncDocumentClient asyncClientResourceToken = null; try { List permissionFeed = new ArrayList<>(); @@ -413,6 +417,7 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound( .build(); RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(partitionKey)); + Mono> readObservable = asyncClientResourceToken .readDocument(documentUrl, options); FailureValidator validator = new FailureValidator.Builder().resourceNotFound().build(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index b015f15893e1..ee7ad051eb78 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -204,7 +204,7 @@ public CosmosAsyncDatabase getDatabase(String id) { } } - @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SETUP_TIMEOUT) + @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SETUP_TIMEOUT) public void beforeSuite() { logger.info("beforeSuite Started"); @@ -226,7 +226,7 @@ public static void parallelizeUnitTests(ITestContext context) { context.getSuite().getXmlSuite().setThreadCount(Runtime.getRuntime().availableProcessors()); } - @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) + @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) public void afterSuite() { logger.info("afterSuite Started"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml new file mode 100644 index 000000000000..37adba46a374 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml new file mode 100644 index 000000000000..b68cad70628e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml new file mode 100644 index 000000000000..541cd8cbb867 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 30ea01807327..b3d475cdc9c5 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -5,8 +5,10 @@ #### Features Added * Added optional id validation to prevent documents with invalid char '/' in id property to be created. - See [PR 41108](https://github.com/Azure/azure-sdk-for-java/pull/41108) * Added support for specifying a set of custom diagnostic correlation ids in the request options. - See [PR 40835](https://github.com/Azure/azure-sdk-for-java/pull/40835) +* Added support for client-driven partition-level failover for multi-write CosmosDB accounts. - See[PR 39265](https://github.com/Azure/azure-sdk-for-java/pull/39265) #### Breaking Changes +* Fixed an issue where customer provided session token is not honored for the `readMany` operation. - See[PR 39265](https://github.com/Azure/azure-sdk-for-java/pull/39265) #### Bugs Fixed * Fixed an issue where `contactedRegions` shows the wrong region in a multi region account if no preferred regions are specified. - See [PR 41045](https://github.com/Azure/azure-sdk-for-java/pull/41045) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java index 0197f50ecd9e..48f631b4af3c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java @@ -731,7 +731,7 @@ private Mono createDatabaseInternal(Database database, C requestOptions); } - private ConsistencyLevel getEffectiveConsistencyLevel( + ConsistencyLevel getEffectiveConsistencyLevel( OperationType operationType, ConsistencyLevel desiredConsistencyLevelOfOperation) { @@ -860,7 +860,7 @@ public boolean isEndpointDiscoveryEnabled(CosmosAsyncClient client) { @Override public String getConnectionMode(CosmosAsyncClient client) { - return client.connectionPolicy.getConnectionMode().toString(); + return client.getConnectionPolicy().getConnectionMode().toString(); } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java index 21fcc64bfa5a..e15a88e12618 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java @@ -1488,7 +1488,7 @@ public Mono> readMany( CosmosReadManyRequestOptions options = new CosmosReadManyRequestOptions(); - if (!StringUtils.isNotEmpty(sessionToken)) { + if (StringUtils.isNotEmpty(sessionToken)) { options = options.setSessionToken(sessionToken); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 9a74fd197593..7bf588e584e9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1183,6 +1183,7 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { validateConfig(); buildConnectionPolicy(); CosmosAsyncClient cosmosAsyncClient = new CosmosAsyncClient(this); + if (proactiveContainerInitConfig != null) { cosmosAsyncClient.recordOpenConnectionsAndInitCachesStarted(proactiveContainerInitConfig.getCosmosContainerIdentities()); @@ -1217,6 +1218,7 @@ public CosmosClient buildClient() { validateConfig(); buildConnectionPolicy(); CosmosClient cosmosClient = new CosmosClient(this); + if (proactiveContainerInitConfig != null) { cosmosClient.recordOpenConnectionsAndInitCachesStarted(proactiveContainerInitConfig.getCosmosContainerIdentities()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java index 3f3bf6610553..0e964bd550cf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java @@ -6,7 +6,9 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.FeedResponseDiagnostics; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.SerializationDiagnosticsContext; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.util.Beta; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -324,6 +326,14 @@ CosmosDiagnostics setSamplingRateSnapshot(double samplingRate) { return this; } + String getFirstContactedRegion() { + return this.clientSideRequestStatistics.getFirstContactedRegion(); + } + + URI getFirstContactedLocationEndpoint() { + return this.clientSideRequestStatistics.getFirstContactedLocationEndpoint(); + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// @@ -466,6 +476,43 @@ public void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDia cosmosDiagnostics.setDiagnosticsContext(ctx); } + + @Override + public URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics) { + + if (cosmosDiagnostics == null) { + return null; + } + + return cosmosDiagnostics.getFirstContactedLocationEndpoint(); + } + + @Override + public void mergeMetadataDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, MetadataDiagnosticsContext otherMetadataDiagnosticsContext) { + + if (cosmosDiagnostics == null) { + return; + } + + ClientSideRequestStatistics clientSideRequestStatistics = cosmosDiagnostics.clientSideRequestStatistics; + + if (clientSideRequestStatistics != null) { + clientSideRequestStatistics.mergeMetadataDiagnosticsContext(otherMetadataDiagnosticsContext); + } + } + + @Override + public void mergeSerializationDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, SerializationDiagnosticsContext otherSerializationDiagnosticsContext) { + if (cosmosDiagnostics == null) { + return; + } + + ClientSideRequestStatistics clientSideRequestStatistics = cosmosDiagnostics.clientSideRequestStatistics; + + if (clientSideRequestStatistics != null) { + clientSideRequestStatistics.mergeSerializationDiagnosticsContext(otherSerializationDiagnosticsContext); + } + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index 9530963698fd..1ef7b14c9908 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -5,6 +5,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.core.credential.TokenCredential; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosContainerProactiveInitConfig; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosItemSerializer; @@ -14,6 +15,7 @@ import com.azure.cosmos.implementation.batch.ServerBatchRequest; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.directconnectivity.AddressSelector; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; @@ -1590,6 +1592,8 @@ Flux> readAllDocuments( */ GlobalEndpointManager getGlobalEndpointManager(); + GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); + /*** * Get the address selector. * @return the address selector. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index d713e1938012..af5a391ba72d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -5,6 +5,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStateV1; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.feedranges.FeedRangeInternal; import com.azure.cosmos.implementation.query.Paginator; import com.azure.cosmos.implementation.spark.OperationContext; @@ -18,6 +19,7 @@ import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import java.util.function.Supplier; @@ -29,12 +31,16 @@ class ChangeFeedQueryImpl { ImplementationBridgeHelpers.FeedResponseHelper.FeedResponseAccessor feedResponseAccessor = ImplementationBridgeHelpers.FeedResponseHelper.getFeedResponseAccessor(); + private final static ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.CosmosChangeFeedRequestOptionsAccessor changeFeedRequestOptionsAccessor = + ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor(); + private static final int INITIAL_TOP_VALUE = -1; private final RxDocumentClientImpl client; private final DiagnosticsClientContext clientContext; private final Supplier createRequestFunc; private final String documentsLink; + private final String collectionLink; private final Function>> executeFunc; private final Class klass; private final CosmosChangeFeedRequestOptions options; @@ -72,6 +78,7 @@ public ChangeFeedQueryImpl( this.client = client; this.resourceType = resourceType; this.klass = klass; + this.collectionLink = collectionLink; this.documentsLink = Utils.joinPath(collectionLink, Paths.DOCUMENTS_PATH_SEGMENT); this.options = requestOptions; this.itemSerializer = client.getEffectiveItemSerializer(requestOptions.getCustomItemSerializer()); @@ -110,7 +117,7 @@ public Flux> executeAsync() { .CosmosChangeFeedRequestOptionsHelper .getCosmosChangeFeedRequestOptionsAccessor() .getOperationContext(this.options) - ); + ); } private RxDocumentServiceRequest createDocumentServiceRequest() { @@ -140,6 +147,8 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { if (request.requestContext != null) { request.requestContext.setExcludeRegions(options.getExcludedRegions()); request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); + request.requestContext.setFeedOperationContext( + new FeedOperationContextForCircuitBreaker(new ConcurrentHashMap<>(), false, collectionLink)); } return request; @@ -147,8 +156,9 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { private Mono> executeRequestAsync(RxDocumentServiceRequest request) { if (this.operationContextAndListener == null) { - return client.readFeed(request) - .map(rsp -> feedResponseAccessor.createChangeFeedResponse(rsp, this.itemSerializer, klass)); + return handlePartitionLevelCircuitBreakingPrerequisites(request) + .flatMap(client::readFeed) + .map(rsp -> feedResponseAccessor.createChangeFeedResponse(rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics())); } else { final OperationListener listener = operationContextAndListener.getOperationListener(); final OperationContext operationContext = operationContextAndListener.getOperationContext(); @@ -157,31 +167,71 @@ private Mono> executeRequestAsync(RxDocumentServiceRequest reque .put(HttpConstants.HttpHeaders.CORRELATED_ACTIVITY_ID, operationContext.getCorrelationActivityId()); listener.requestListener(operationContext, request); - return client.readFeed(request) - .map(rsp -> { - listener.responseListener(operationContext, rsp); - - final FeedResponse feedResponse = feedResponseAccessor.createChangeFeedResponse( - rsp, this.itemSerializer, klass); - - Map rspHeaders = feedResponse.getResponseHeaders(); - String requestPkRangeId = null; - if (!rspHeaders.containsKey(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID) && - (requestPkRangeId = request - .getHeaders() - .get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID)) != null) { - - rspHeaders.put( - HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID, - requestPkRangeId - ); - } - listener.feedResponseReceivedListener(operationContext, feedResponse); - - return feedResponse; - }) - .doOnError(ex -> listener.exceptionListener(operationContext, ex) - ); + return handlePartitionLevelCircuitBreakingPrerequisites(request) + .flatMap(client::readFeed) + .map(rsp -> { + listener.responseListener(operationContext, rsp); + + final FeedResponse feedResponse = feedResponseAccessor.createChangeFeedResponse( + rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics()); + + Map rspHeaders = feedResponse.getResponseHeaders(); + String requestPkRangeId = null; + if (!rspHeaders.containsKey(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID) && + (requestPkRangeId = request + .getHeaders() + .get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID)) != null) { + + rspHeaders.put( + HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID, + requestPkRangeId + ); + } + listener.feedResponseReceivedListener(operationContext, feedResponse); + + return feedResponse; + }) + .doOnError(ex -> listener.exceptionListener(operationContext, ex)); + } + } + + private Mono handlePartitionLevelCircuitBreakingPrerequisites(RxDocumentServiceRequest request) { + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = client.getGlobalPartitionEndpointManagerForCircuitBreaker(); + + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "Argument 'globalPartitionEndpointManagerForCircuitBreaker' must not be null!"); + + if (globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + return Mono.just(request) + .flatMap(req -> client.populateHeadersAsync(req, RequestVerb.GET)) + .flatMap(req -> client.getCollectionCache().resolveCollectionAsync(null, req) + .flatMap(documentCollectionValueHolder -> { + + checkNotNull(documentCollectionValueHolder, "Argument 'documentCollectionValueHolder' cannot be null!"); + checkNotNull(documentCollectionValueHolder.v, "Argument 'documentCollectionValueHolder.v' cannot be null!"); + + return client.getPartitionKeyRangeCache().tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + checkNotNull(collectionRoutingMapValueHolder, "Argument 'collectionRoutingMapValueHolder' cannot be null!"); + checkNotNull(collectionRoutingMapValueHolder.v, "Argument 'collectionRoutingMapValueHolder.v' cannot be null!"); + + changeFeedRequestOptionsAccessor.setPartitionKeyDefinition(options, documentCollectionValueHolder.v.getPartitionKey()); + changeFeedRequestOptionsAccessor.setCollectionRid(options, documentCollectionValueHolder.v.getResourceId()); + + client.addPartitionLevelUnavailableRegionsForChangeFeedRequest(req, options, collectionRoutingMapValueHolder.v); + + if (req.requestContext.getClientRetryPolicySupplier() != null) { + DocumentClientRetryPolicy documentClientRetryPolicy = req.requestContext.getClientRetryPolicySupplier().get(); + documentClientRetryPolicy.onBeforeSendRequest(req); + } + + return Mono.just(req); + }); + })); + } else { + return Mono.just(request); } } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 6e339db58c88..329c9f1a8f05 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.WebExceptionUtility; import com.azure.cosmos.implementation.faultinjection.FaultInjectionRequestContext; import org.slf4j.Logger; @@ -52,12 +53,14 @@ public class ClientRetryPolicy extends DocumentClientRetryPolicy { private RxDocumentServiceRequest request; private RxCollectionCache rxCollectionCache; private final FaultInjectionRequestContext faultInjectionRequestContext; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, boolean enableEndpointDiscovery, ThrottlingRetryOptions throttlingRetryOptions, - RxCollectionCache rxCollectionCache) { + RxCollectionCache rxCollectionCache, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { this.globalEndpointManager = globalEndpointManager; this.failoverRetryCount = 0; @@ -73,6 +76,7 @@ public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, false); this.rxCollectionCache = rxCollectionCache; this.faultInjectionRequestContext = new FaultInjectionRequestContext(); + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; } @Override @@ -161,6 +165,34 @@ public Mono shouldRetry(Exception e) { clientException); } + if (clientException != null + && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.REQUEST_TIMEOUT) + && Exceptions.isSubStatusCode(clientException, HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT)) { + + if (logger.isDebugEnabled()) { + logger.debug( + "Request timeout - IsReadRequest {}, IsWebExceptionRetriable {}, NonIdempotentWriteRetriesEnabled {}", + this.isReadRequest, + false, + this.request.getNonIdempotentWriteRetriesEnabled(), + e); + } + + return this.shouldRetryOnRequestTimeout( + this.isReadRequest, + this.request.getNonIdempotentWriteRetriesEnabled() + ); + } + + if (clientException != null && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR)) { + + if (logger.isDebugEnabled()) { + logger.info("Internal server error - IsReadRequest {}", this.isReadRequest, e); + } + + return this.shouldRetryOnInternalServerError(); + } + return this.throttlingRetry.shouldRetry(e); } @@ -268,10 +300,15 @@ private Mono shouldRetryOnEndpointFailureAsync(boolean isRead } private Mono shouldRetryOnGatewayTimeout() { - boolean canFailoverOnTimeout = canGatewayRequestFailoverOnTimeout(request); + + boolean canFailoverOnTimeout = canGatewayRequestFailoverOnTimeout(this.request); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); + } //if operation is data plane read, metadata read, or query plan it can be retried on a different endpoint. - if(canFailoverOnTimeout) { + if (canFailoverOnTimeout) { if (!this.enableEndpointDiscovery || this.failoverRetryCount > MaxRetryCount) { logger.warn("shouldRetryOnHttpTimeout() Not retrying. Retry count = {}", this.failoverRetryCount); return Mono.just(ShouldRetryResult.noRetry()); @@ -317,6 +354,11 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( boolean nonIdempotentWriteRetriesEnabled, CosmosException cosmosException) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + this.globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); + } + // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations // For server generated retries, it is safe to retry // For SDK generated 503, it will be more tricky as we have to decide the cause of it. For any causes that SDK not sure whether the request @@ -367,6 +409,32 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( return Mono.just(ShouldRetryResult.retryAfter(Duration.ZERO)); } + private Mono shouldRetryOnRequestTimeout( + boolean isReadRequest, + boolean nonIdempotentWriteRetriesEnabled) { + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + if (!isReadRequest && !nonIdempotentWriteRetriesEnabled) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange( + request, + request.requestContext.locationEndpointToRoute); + } + } + + return Mono.just(ShouldRetryResult.NO_RETRY); + } + + private Mono shouldRetryOnInternalServerError() { + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange( + request, + request.requestContext.locationEndpointToRoute); + } + + return Mono.just(ShouldRetryResult.NO_RETRY); + } + @Override public void onBeforeSendRequest(RxDocumentServiceRequest request) { this.request = request; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index 250300127654..eb1eb492de22 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; import com.azure.cosmos.implementation.cpu.CpuMemoryMonitor; import com.azure.cosmos.implementation.directconnectivity.StoreResponseDiagnostics; import com.azure.cosmos.implementation.directconnectivity.StoreResultDiagnostics; @@ -26,8 +27,10 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.NavigableSet; import java.util.Objects; import java.util.Set; +import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.stream.Collectors; @@ -46,6 +49,7 @@ public class ClientSideRequestStatistics { private Instant requestStartTimeUTC; private Instant requestEndTimeUTC; private Set regionsContacted; + private NavigableSet regionsContactedWithContext; private Set locationEndpointsContacted; private RetryContext retryContext; private FaultInjectionRequestContext requestContext; @@ -70,6 +74,7 @@ public ClientSideRequestStatistics(DiagnosticsClientContext diagnosticsClientCon this.contactedReplicas = Collections.synchronizedList(new ArrayList<>()); this.failedReplicas = Collections.synchronizedSet(new HashSet<>()); this.regionsContacted = Collections.synchronizedSet(new HashSet<>()); + this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>()); this.locationEndpointsContacted = Collections.synchronizedSet(new HashSet<>()); this.metadataDiagnosticsContext = new MetadataDiagnosticsContext(); this.serializationDiagnosticsContext = new SerializationDiagnosticsContext(); @@ -92,6 +97,7 @@ public ClientSideRequestStatistics(ClientSideRequestStatistics toBeCloned) { this.contactedReplicas = Collections.synchronizedList(new ArrayList<>(toBeCloned.contactedReplicas)); this.failedReplicas = Collections.synchronizedSet(new HashSet<>(toBeCloned.failedReplicas)); this.regionsContacted = Collections.synchronizedSet(new HashSet<>(toBeCloned.regionsContacted)); + this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>(toBeCloned.regionsContactedWithContext)); this.locationEndpointsContacted = Collections.synchronizedSet( new HashSet<>(toBeCloned.locationEndpointsContacted)); this.metadataDiagnosticsContext = new MetadataDiagnosticsContext(toBeCloned.metadataDiagnosticsContext); @@ -162,6 +168,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost this.approximateInsertionCountInBloomFilter = request.requestContext.getApproximateBloomFilterInsertionCount(); storeResponseStatistics.sessionTokenEvaluationResults = request.requestContext.getSessionTokenEvaluationResults(); + storeResponseStatistics.locationToLocationSpecificHealthContext = request.requestContext.getLocationToLocationSpecificHealthContext(); if (request.requestContext.getEndToEndOperationLatencyPolicyConfig() != null) { storeResponseStatistics.e2ePolicyCfg = @@ -187,6 +194,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost globalEndpointManager.getRegionName(locationEndPoint, request.getOperationType()); this.regionsContacted.add(storeResponseStatistics.regionName); this.locationEndpointsContacted.add(locationEndPoint); + this.regionsContactedWithContext.add(new RegionWithContext(storeResponseStatistics.regionName, locationEndPoint)); } if (storeResponseStatistics.requestOperationType == OperationType.Head @@ -219,8 +227,13 @@ public void recordGatewayResponse( this.recordRetryContextEndTime(); if (locationEndPoint != null) { - this.regionsContacted.add(globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType())); + + String regionName = globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType()); + + this.regionsContacted.add(regionName); this.locationEndpointsContacted.add(locationEndPoint); + + this.regionsContactedWithContext.add(new RegionWithContext(regionName, locationEndPoint)); } GatewayStatistics gatewayStatistics = new GatewayStatistics(); @@ -231,6 +244,7 @@ public void recordGatewayResponse( if (rxDocumentServiceRequest.requestContext != null) { gatewayStatistics.sessionTokenEvaluationResults = rxDocumentServiceRequest.requestContext.getSessionTokenEvaluationResults(); + gatewayStatistics.locationToLocationSpecificHealthContext = rxDocumentServiceRequest.requestContext.getLocationToLocationSpecificHealthContext(); } } gatewayStatistics.statusCode = storeResponseDiagnostics.getStatusCode(); @@ -256,6 +270,26 @@ public int getRequestPayloadSizeInBytes() { return this.requestPayloadSizeInBytes; } + public void mergeMetadataDiagnosticsContext(MetadataDiagnosticsContext other) { + if (other == null || other.metadataDiagnosticList == null || other.metadataDiagnosticList.isEmpty()) { + return; + } + + for (MetadataDiagnosticsContext.MetadataDiagnostics metadataDiagnostics : other.metadataDiagnosticList) { + this.metadataDiagnosticsContext.addMetaDataDiagnostic(metadataDiagnostics); + } + } + + public void mergeSerializationDiagnosticsContext(SerializationDiagnosticsContext other) { + if (other == null || other.serializationDiagnosticsList == null || other.serializationDiagnosticsList.isEmpty()) { + return; + } + + for (SerializationDiagnosticsContext.SerializationDiagnostics serializationDiagnostics : other.serializationDiagnosticsList) { + this.serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); + } + } + public String recordAddressResolutionStart( URI targetEndpoint, boolean forceRefresh, @@ -414,6 +448,21 @@ private void mergeLocationEndpointsContacted(Set other) { } } + private void mergeRegionWithContextSet(NavigableSet other) { + if (other == null) { + return; + } + + if (this.regionsContactedWithContext == null || this.regionsContactedWithContext.isEmpty()) { + this.regionsContactedWithContext = other; + return; + } + + for (RegionWithContext regionWithContext : other) { + this.regionsContactedWithContext.add(regionWithContext); + } + } + private void mergeRegionsContacted(Set other) { if (other == null) { return; @@ -475,6 +524,7 @@ public void mergeClientSideRequestStatistics(ClientSideRequestStatistics other) this.mergeFailedReplica(other.failedReplicas); this.mergeLocationEndpointsContacted(other.locationEndpointsContacted); this.mergeRegionsContacted(other.regionsContacted); + this.mergeRegionWithContextSet(other.regionsContactedWithContext); this.mergeStartTime(other.requestStartTimeUTC); this.mergeEndTime(other.requestEndTimeUTC); this.mergeSupplementalResponses(other.supplementalResponseStatisticsList); @@ -601,6 +651,22 @@ public ClientSideRequestStatistics setSamplingRateSnapshot(double samplingRateSn return this; } + public String getFirstContactedRegion() { + if (this.regionsContactedWithContext == null || this.regionsContactedWithContext.isEmpty()) { + return StringUtils.EMPTY; + } + + return this.regionsContactedWithContext.first().regionContacted; + } + + public URI getFirstContactedLocationEndpoint() { + if (this.regionsContactedWithContext == null || this.regionsContactedWithContext.isEmpty()) { + return null; + } + + return this.regionsContactedWithContext.first().locationEndpointsContacted; + } + public static class StoreResponseStatistics { @JsonSerialize(using = StoreResultDiagnostics.StoreResultDiagnosticsSerializer.class) private StoreResultDiagnostics storeResult; @@ -627,6 +693,9 @@ public static class StoreResponseStatistics { @JsonSerialize private Set sessionTokenEvaluationResults; + @JsonSerialize + private Utils.ValueHolder> locationToLocationSpecificHealthContext; + public String getExcludedRegions() { return this.excludedRegions; } @@ -663,6 +732,10 @@ public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } + public Utils.ValueHolder> getLocationToLocationSpecificHealthContext() { + return locationToLocationSpecificHealthContext; + } + @JsonIgnore public Duration getDuration() { if (requestStartTimeUTC == null || @@ -704,7 +777,7 @@ public void serialize( generator.writeObjectField("responseStatisticsList", statistics.responseStatisticsList); generator.writeObjectField("supplementalResponseStatisticsList", getCappedSupplementalResponseStatisticsList(statistics.supplementalResponseStatisticsList)); generator.writeObjectField("addressResolutionStatistics", statistics.addressResolutionStatistics); - generator.writeObjectField("regionsContacted", statistics.regionsContacted); + generator.writeObjectField("regionsContacted", statistics.getContactedRegionNames()); generator.writeObjectField("retryContext", statistics.retryContext); generator.writeObjectField("metadataDiagnosticsContext", statistics.getMetadataDiagnosticsContext()); generator.writeObjectField("serializationDiagnosticsContext", statistics.getSerializationDiagnosticsContext()); @@ -825,6 +898,7 @@ public static class GatewayStatistics { private String faultInjectionRuleId; private List faultInjectionEvaluationResults; private Set sessionTokenEvaluationResults; + private Utils.ValueHolder> locationToLocationSpecificHealthContext; public String getSessionToken() { return sessionToken; @@ -882,6 +956,10 @@ public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } + public Utils.ValueHolder> getLocationToLocationSpecificHealthContext() { + return locationToLocationSpecificHealthContext; + } + public static class GatewayStatisticsSerializer extends StdSerializer { private static final long serialVersionUID = 1L; @@ -915,6 +993,7 @@ public void serialize(GatewayStatistics gatewayStatistics, } this.writeNonEmptyStringSetField(jsonGenerator, "sessionTokenEvaluationResults", gatewayStatistics.getSessionTokenEvaluationResults()); + this.writeNonNullObjectField(jsonGenerator, "locationToLocationSpecificHealthContext", gatewayStatistics.getLocationToLocationSpecificHealthContext()); jsonGenerator.writeEndObject(); } @@ -941,6 +1020,14 @@ private void writeNonEmptyStringSetField(JsonGenerator jsonGenerator, String fie jsonGenerator.writePOJOField(fieldName, values); } + + private void writeNonNullObjectField(JsonGenerator jsonGenerator, String fieldName, Object object) throws IOException { + if (object == null) { + return; + } + + jsonGenerator.writePOJOField(fieldName, object); + } } } @@ -965,4 +1052,31 @@ public static CosmosDiagnosticsSystemUsageSnapshot fetchSystemInformation() { (maxMemory - (totalMemory - freeMemory)) + " KB", runtime.availableProcessors()); } + + static class RegionWithContext implements Comparable { + + private final String regionContacted; + private final URI locationEndpointsContacted; + private final long recordedTimestamp; + + RegionWithContext(String regionContacted, URI locationEndpointsContacted) { + this.regionContacted = regionContacted; + this.locationEndpointsContacted = locationEndpointsContacted; + this.recordedTimestamp = System.currentTimeMillis(); + } + + @Override + public int compareTo(RegionWithContext o) { + + if (o == null || this.recordedTimestamp > o.recordedTimestamp) { + return 1; + } + + if (this.recordedTimestamp == o.recordedTimestamp) { + return 0; + } + + return -1; + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java new file mode 100644 index 000000000000..ad0a9c82fed1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; + +public class CollectionRoutingMapNotFoundException extends CosmosException { + private static final long serialVersionUID = 1L; + + /** + * Instantiates a new Invalid partition exception. + * + * @param msg the msg + */ + public CollectionRoutingMapNotFoundException(String msg) { + super(HttpConstants.StatusCodes.NOTFOUND, msg); + setSubStatus(); + } + + private void setSubStatus() { + this.getResponseHeaders().put( + WFConstants.BackendHeaders.SUB_STATUS, + Integer.toString(HttpConstants.SubStatusCodes.INCORRECT_CONTAINER_RID_SUB_STATUS)); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index b706c6781dde..bf35c53be1df 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.directconnectivity.Protocol; import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslContextBuilder; @@ -216,6 +217,31 @@ public class Configs { public static final String METRICS_CONFIG = "COSMOS.METRICS_CONFIG"; public static final String DEFAULT_METRICS_CONFIG = CosmosMicrometerMetricsConfig.DEFAULT.toJson(); + // For partition-level circuit breaker, below config will set the tolerated consecutive exception counts + // for reads and writes for a given partition before being marked as Unavailable + private static final String DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = PartitionLevelCircuitBreakerConfig.DEFAULT.toJson(); + private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; + private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT"; + private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = 2; + private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; + private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; + + // For partition-level circuit breaker, a background thread will run periodically every y seconds at a minimum + // in an attempt to recover Unavailable partitions + private static final String STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = "COSMOS.STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS"; + private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 60; + + // For partition-level circuit breaker, a partition can be allowed to be Unavailable for minimum of x seconds + // as specified by the below setting after which a background thread will attempt to recover the partition + private static final String ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = "COSMOS.ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS"; + private static final int DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = 30; + + // For partition-level circuit breaker, in order to recover a partition in a region, the SDK when configured + // in the direct connectivity mode, establishes connections to replicas to attempt to recover a region + // Below sets a time limit on how long these connection establishments be attempted for + private static final int DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS = 10; + private static final String CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS = "COSMOS.CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS"; + public Configs() { this.sslContext = sslContextInit(); } @@ -627,4 +653,111 @@ public static CosmosMicrometerMetricsConfig getMetricsConfig() { return CosmosMicrometerMetricsConfig.fromJsonString(metricsConfig); } + + public static PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreakerConfig() { + String partitionLevelCircuitBreakerConfigAsString = + System.getProperty( + PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG, + firstNonNull( + emptyToNull(System.getenv().get(PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG)), + DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG)); + + PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig + = PartitionLevelCircuitBreakerConfig.fromJsonString(partitionLevelCircuitBreakerConfigAsString); + + if (partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads() < 10) { + return PartitionLevelCircuitBreakerConfig.DEFAULT; + } + + if (partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites() < 5) { + return PartitionLevelCircuitBreakerConfig.DEFAULT; + } + + return partitionLevelCircuitBreakerConfig; + } + + public static int getStaleCollectionCacheRefreshRetryCount() { + + String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + } + + String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + } + + return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT; + } + + public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { + + String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + } + + String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + } + + return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS; + } + + public static int getStalePartitionUnavailabilityRefreshIntervalInSeconds() { + + String valueFromSystemProperty = System.getProperty(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + } + + String valueFromEnvVariable = System.getenv(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + } + + return DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS; + } + + public static int getAllowedPartitionUnavailabilityDurationInSeconds() { + + String valueFromSystemProperty = System.getProperty(ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); + } + + String valueFromEnvVariable = System.getenv(ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); + } + + return DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS; + } + + public static int getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds() { + + String valueFromSystemProperty = System.getProperty(CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + } + + String valueFromEnvVariable = System.getenv(CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + } + + return DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java index fec25746317e..34eccc9231c2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java @@ -16,6 +16,7 @@ import com.azure.cosmos.models.CosmosRequestOptions; import com.azure.cosmos.models.DedicatedGatewayRequestOptions; import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.util.Beta; import java.util.HashMap; @@ -43,6 +44,8 @@ public final class CosmosChangeFeedRequestOptionsImpl implements OverridableRequ private CosmosDiagnosticsThresholds thresholds; private List excludeRegions; private CosmosItemSerializer customSerializer; + private PartitionKeyDefinition partitionKeyDefinition; + private String collectionRid; private Set keywordIdentifiers; public CosmosChangeFeedRequestOptionsImpl(CosmosChangeFeedRequestOptionsImpl toBeCloned) { @@ -61,6 +64,8 @@ public CosmosChangeFeedRequestOptionsImpl(CosmosChangeFeedRequestOptionsImpl toB this.thresholds = toBeCloned.thresholds; this.excludeRegions = toBeCloned.excludeRegions; this.customSerializer = toBeCloned.customSerializer; + this.collectionRid = toBeCloned.collectionRid; + this.partitionKeyDefinition = toBeCloned.partitionKeyDefinition; this.keywordIdentifiers = toBeCloned.keywordIdentifiers; } @@ -329,6 +334,22 @@ private void addCustomOptionsForFullFidelityMode() { HttpConstants.ChangeFeedWireFormatVersions.SEPARATE_METADATA_WITH_CRTS); } + public PartitionKeyDefinition getPartitionKeyDefinition() { + return partitionKeyDefinition; + } + + public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.partitionKeyDefinition = partitionKeyDefinition; + } + + public String getCollectionRid() { + return collectionRid; + } + + public void setCollectionRid(String collectionRid) { + this.collectionRid = collectionRid; + } + public void setKeywordIdentifiers(Set keywordIdentifiers) { this.keywordIdentifiers = keywordIdentifiers; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java index b81e342d98b9..b7b7b3f50b0b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java @@ -30,6 +30,7 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio private String queryName; private Integer maxItemCountForVectorSearch; private List cancelledRequestDiagnosticsTracker = new ArrayList<>(); + private String collectionRid; /** * Instantiates a new query request options. @@ -68,6 +69,7 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) { this.feedRange = options.feedRange; this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker; this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch; + this.collectionRid = options.collectionRid; } /** @@ -387,4 +389,12 @@ public void override(CosmosRequestOptions cosmosRequestOptions) { this.maxItemCount = overrideOption(cosmosRequestOptions.getMaxItemCount(), this.maxItemCount); this.queryName = overrideOption(cosmosRequestOptions.getQueryName(), this.queryName); } + + public String getCollectionRid() { + return collectionRid; + } + + public void setCollectionRid(String collectionRid) { + this.collectionRid = collectionRid; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java index 00b6202f608e..cce49c6d17e6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java @@ -86,4 +86,9 @@ public class CosmosSchedulers { TTL_FOR_SCHEDULER_WORKER_IN_SECONDS, true ); + + public final static Scheduler PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE = Schedulers.newSingle( + "partition-availability-staleness-check", + true + ); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java index 48eea593c53c..8f1ddfc122dd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java @@ -10,6 +10,7 @@ import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.SessionRetryOptions; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.guava27.Strings; import com.fasterxml.jackson.core.JsonGenerator; @@ -89,6 +90,11 @@ public void serialize(DiagnosticsClientConfig clientConfig, JsonGenerator genera if (!StringUtils.isEmpty(clientConfig.regionScopedSessionContainerOptionsAsString)) { generator.writeStringField("regionScopedSessionCfg", clientConfig.regionScopedSessionContainerOptionsAsString); } + + if (!StringUtils.isEmpty(clientConfig.partitionLevelCircuitBreakerConfigAsString)) { + generator.writeStringField("partitionLevelCircuitBreakerCfg", clientConfig.partitionLevelCircuitBreakerConfigAsString); + } + } catch (Exception e) { logger.debug("unexpected failure", e); } @@ -121,6 +127,7 @@ class DiagnosticsClientConfig { private ConnectionPolicy connectionPolicy; private String sessionRetryOptionsAsString; private String regionScopedSessionContainerOptionsAsString; + private String partitionLevelCircuitBreakerConfigAsString; public DiagnosticsClientConfig withMachineId(String machineId) { this.machineId = machineId; @@ -228,6 +235,16 @@ public DiagnosticsClientConfig withSessionRetryOptions(SessionRetryOptions sessi return this; } + public DiagnosticsClientConfig withPartitionLevelCircuitBreakerConfig(PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig) { + if (partitionLevelCircuitBreakerConfig == null) { + this.partitionLevelCircuitBreakerConfigAsString = ""; + } else { + this.partitionLevelCircuitBreakerConfigAsString = partitionLevelCircuitBreakerConfig.getConfigAsString(); + } + + return this; + } + public DiagnosticsClientConfig withRegionScopedSessionContainerOptions(RegionScopedSessionContainer regionScopedSessionContainer) { if (regionScopedSessionContainer == null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index c813b06a6513..953d8e870413 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -7,6 +7,7 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.StoreResult; import com.azure.cosmos.implementation.directconnectivity.TimeoutHelper; @@ -20,6 +21,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; public class DocumentServiceRequestContext implements Cloneable { public volatile boolean forceAddressRefresh; @@ -53,10 +55,18 @@ public class DocumentServiceRequestContext implements Cloneable { private volatile Set keywordIdentifiers; private volatile long approximateBloomFilterInsertionCount; private final Set sessionTokenEvaluationResults = ConcurrentHashMap.newKeySet(); + private volatile List unavailableRegionsForPartition; // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics public final Map rntbdCancelledRequestMap = new ConcurrentHashMap<>(); + private PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker; + + private FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker; + private volatile Supplier clientRetryPolicySupplier; + private volatile Utils.ValueHolder> regionToLocationSpecificHealthContext + = new Utils.ValueHolder<>(); + public DocumentServiceRequestContext() {} /** @@ -138,6 +148,9 @@ public DocumentServiceRequestContext clone() { context.throughputControlCycleId = this.throughputControlCycleId; context.replicaAddressValidationEnabled = this.replicaAddressValidationEnabled; context.endToEndOperationLatencyPolicyConfig = this.endToEndOperationLatencyPolicyConfig; + context.unavailableRegionsForPartition = this.unavailableRegionsForPartition; + context.feedOperationContextForCircuitBreaker = this.feedOperationContextForCircuitBreaker; + context.pointOperationContextForCircuitBreaker = this.pointOperationContextForCircuitBreaker; return context; } @@ -165,6 +178,30 @@ public void setExcludeRegions(List excludeRegions) { this.excludeRegions = excludeRegions; } + public List getUnavailableRegionsForPartition() { + return unavailableRegionsForPartition; + } + + public void setUnavailableRegionsForPartition(List unavailableRegionsForPartition) { + this.unavailableRegionsForPartition = unavailableRegionsForPartition; + } + + public PointOperationContextForCircuitBreaker getPointOperationContextForCircuitBreaker() { + return pointOperationContextForCircuitBreaker; + } + + public void setPointOperationContext(PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + this.pointOperationContextForCircuitBreaker = pointOperationContextForCircuitBreaker; + } + + public FeedOperationContextForCircuitBreaker getFeedOperationContextForCircuitBreaker() { + return feedOperationContextForCircuitBreaker; + } + + public void setFeedOperationContext(FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker) { + this.feedOperationContextForCircuitBreaker = feedOperationContextForCircuitBreaker; + } + public void setKeywordIdentifiers(Set keywordIdentifiers) { this.keywordIdentifiers = keywordIdentifiers; } @@ -184,5 +221,21 @@ public void setApproximateBloomFilterInsertionCount(long approximateBloomFilterI public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } + + public Supplier getClientRetryPolicySupplier() { + return clientRetryPolicySupplier; + } + + public void setClientRetryPolicySupplier(Supplier clientRetryPolicySupplier) { + this.clientRetryPolicySupplier = clientRetryPolicySupplier; + } + + public Utils.ValueHolder> getLocationToLocationSpecificHealthContext() { + return regionToLocationSpecificHealthContext; + } + + public void setLocationToLocationSpecificHealthContext(Map regionToLocationSpecificHealthContext) { + this.regionToLocationSpecificHealthContext.v = regionToLocationSpecificHealthContext; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java new file mode 100644 index 000000000000..0858b766f182 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; + +import java.util.Map; + +public class FeedOperationContextForCircuitBreaker { + + private final Map partitionKeyRangesWithSuccess; + private final boolean isThresholdBasedAvailabilityStrategyEnabled; + private final String collectionLink; + private boolean isRequestHedged; + + public FeedOperationContextForCircuitBreaker( + Map partitionKeyRangesWithSuccess, + boolean isThresholdBasedAvailabilityStrategyEnabled, + String collectionLink) { + + this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; + this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; + this.collectionLink = collectionLink; + } + + public void setIsRequestHedged(boolean isRequestHedged) { + this.isRequestHedged = isRequestHedged; + } + + public boolean getIsRequestHedged() { + return this.isRequestHedged; + } + + public void addPartitionKeyRangeWithSuccess(PartitionKeyRange partitionKeyRange, String resourceId) { + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + this.partitionKeyRangesWithSuccess.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); + } + + public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRange, String resourceId) { + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + return this.partitionKeyRangesWithSuccess.containsKey(partitionKeyRangeWrapper); + } + + public boolean isThresholdBasedAvailabilityStrategyEnabled() { + return isThresholdBasedAvailabilityStrategyEnabled; + } + + public String getCollectionLink() { + return collectionLink; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index 40ea083e40c0..f36e5ae973ca 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -4,6 +4,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.routing.LocationCache; import com.azure.cosmos.implementation.routing.LocationHelper; import org.slf4j.Logger; @@ -20,7 +21,6 @@ import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; /** @@ -57,6 +57,7 @@ public Throwable getLatestDatabaseRefreshError() { public GlobalEndpointManager(DatabaseAccountManagerInternal owner, ConnectionPolicy connectionPolicy, Configs configs) { this.backgroundRefreshLocationTimeIntervalInMS = configs.getUnavailableLocationsExpirationTimeInSeconds() * 1000; this.maxInitializationTime = Duration.ofSeconds(configs.getGlobalEndpointManagerMaxInitializationTimeInSeconds()); + try { this.locationCache = new LocationCache( connectionPolicy, @@ -103,12 +104,12 @@ public UnmodifiableList getApplicableWriteEndpoints(RxDocumentServiceReques public UnmodifiableList getApplicableReadEndpoints(List excludedRegions) { // readonly - return this.locationCache.getApplicableReadEndpoints(excludedRegions); + return this.locationCache.getApplicableReadEndpoints(excludedRegions, Collections.emptyList()); } public UnmodifiableList getApplicableWriteEndpoints(List excludedRegions) { //readonly - return this.locationCache.getApplicableWriteEndpoints(excludedRegions); + return this.locationCache.getApplicableWriteEndpoints(excludedRegions, Collections.emptyList()); } public List getAvailableReadEndpoints() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java index a9be4c70ea8b..795b32a57015 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java @@ -434,6 +434,8 @@ public static class SubStatusCodes { public static final int TIMEOUT_GENERATED_410 = 20002; // Client generated operation timeout exception public static final int CLIENT_OPERATION_TIMEOUT = 20008; + // Sub-status code paired with 408 status code + public static final int TRANSIT_TIMEOUT = 20911; // IMPORTANT - below sub status codes have no corresponding .Net // version, because they are only applicable in Java diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 97973221ea1b..a8624059aa20 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -84,12 +84,14 @@ import reactor.core.publisher.Mono; import reactor.core.scheduler.Scheduler; +import java.net.URI; import java.time.Duration; import java.util.Collection; import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; @@ -298,6 +300,10 @@ void setCancelledRequestDiagnosticsTracker( void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition); PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options); + + void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); + + String getCollectionRid(CosmosQueryRequestOptions options); } } @@ -331,7 +337,7 @@ public static CosmosReadManyRequestOptionsAccessor getCosmosReadManyRequestOptio } public interface CosmosReadManyRequestOptionsAccessor { - public CosmosQueryRequestOptionsBase getImpl(CosmosReadManyRequestOptions options); + CosmosQueryRequestOptionsBase getImpl(CosmosReadManyRequestOptions options); } } @@ -374,6 +380,14 @@ public interface CosmosChangeFeedRequestOptionsAccessor { CosmosChangeFeedRequestOptions createForProcessingFromContinuation(String continuation, FeedRange targetRange, String continuationLsn); CosmosChangeFeedRequestOptions clone(CosmosChangeFeedRequestOptions toBeCloned); + + String getCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions); + + void setCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions, String collectionRid); + + PartitionKeyDefinition getPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions); + + void setPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions, PartitionKeyDefinition partitionKeyDefinition); } } @@ -869,6 +883,12 @@ void recordAddressResolutionEnd( boolean isNotEmpty(CosmosDiagnostics cosmosDiagnostics); void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDiagnosticsContext ctx); + + URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics); + + void mergeMetadataDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, MetadataDiagnosticsContext otherMetadataDiagnosticsContext); + + void mergeSerializationDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, SerializationDiagnosticsContext otherSerializationDiagnosticsContext); } } @@ -1104,6 +1124,12 @@ FeedResponse createFeedResponse(RxDocumentServiceResponse response, FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, CosmosItemSerializer itemSerializer, Class cls); + + FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, + CosmosItemSerializer itemSerializer, + Class cls, + CosmosDiagnostics diagnostics); + boolean getNoChanges(FeedResponse feedResponse); FeedResponse convertGenericType(FeedResponse feedResponse, Function conversion); FeedResponse createFeedResponse( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java new file mode 100644 index 000000000000..8f5bdc1ccd91 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import java.util.concurrent.atomic.AtomicBoolean; + +public class PointOperationContextForCircuitBreaker { + + private final AtomicBoolean hasOperationSeenSuccess; + private final boolean isThresholdBasedAvailabilityStrategyEnabled; + private boolean isRequestHedged; + private final String collectionLink; + private final MetadataDiagnosticsContext metadataDiagnosticsContext; + private final SerializationDiagnosticsContext serializationDiagnosticsContext; + + public PointOperationContextForCircuitBreaker( + AtomicBoolean hasOperationSeenSuccess, + boolean isThresholdBasedAvailabilityStrategyEnabled, + String collectionLink, + MetadataDiagnosticsContext metadataDiagnosticsContext, + SerializationDiagnosticsContext serializationDiagnosticsContext) { + + this.hasOperationSeenSuccess = hasOperationSeenSuccess; + this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; + this.collectionLink = collectionLink; + this.metadataDiagnosticsContext = metadataDiagnosticsContext; + this.serializationDiagnosticsContext = serializationDiagnosticsContext; + } + + public void setIsRequestHedged(boolean isRequestHedged) { + this.isRequestHedged = isRequestHedged; + } + + public boolean isRequestHedged() { + return this.isRequestHedged; + } + + public void setHasOperationSeenSuccess() { + this.hasOperationSeenSuccess.set(true); + } + + public boolean getHasOperationSeenSuccess() { + return hasOperationSeenSuccess.get(); + } + + public boolean isThresholdBasedAvailabilityStrategyEnabled() { + return this.isThresholdBasedAvailabilityStrategyEnabled; + } + + public String getCollectionLink() { + return this.collectionLink; + } + + public MetadataDiagnosticsContext getMetadataDiagnosticsContext() { + return this.metadataDiagnosticsContext; + } + + public SerializationDiagnosticsContext getSerializationDiagnosticsContext() { + return serializationDiagnosticsContext; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java index b70321c4e35a..de16569def51 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java @@ -13,6 +13,7 @@ import com.azure.cosmos.models.DedicatedGatewayRequestOptions; import com.azure.cosmos.models.IndexingDirective; import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.ThroughputProperties; import java.util.ArrayList; @@ -62,6 +63,8 @@ public class RequestOptions implements OverridableRequestOptions { private final AtomicReference markE2ETimeoutInRequestContextCallbackHook; private Set keywordIdentifiers; + private PartitionKeyDefinition partitionKeyDefinition; + public RequestOptions() { this.markE2ETimeoutInRequestContextCallbackHook = new AtomicReference<>(null); @@ -93,6 +96,7 @@ public RequestOptions(RequestOptions toBeCloned) { this.diagnosticsCtxSupplier = toBeCloned.diagnosticsCtxSupplier; this.markE2ETimeoutInRequestContextCallbackHook = new AtomicReference<>(null); this.effectiveItemSerializer= toBeCloned.effectiveItemSerializer; + this.partitionKeyDefinition = toBeCloned.partitionKeyDefinition; if (toBeCloned.customOptions != null) { this.customOptions = new HashMap<>(toBeCloned.customOptions); @@ -673,5 +677,11 @@ public WriteRetryPolicy calculateAndGetEffectiveNonIdempotentRetriesEnabled( return WriteRetryPolicy.DISABLED; } + public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.partitionKeyDefinition = partitionKeyDefinition; + } + public PartitionKeyDefinition getPartitionKeyDefinition() { + return this.partitionKeyDefinition; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java index 510781c74b59..e89cb494ab84 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java @@ -49,6 +49,10 @@ public RequestTimeoutException(String message, URI requestUri) { this(message, null, null, requestUri); } + public RequestTimeoutException(String message, URI requestUri, int subStatusCode) { + this(message, null, null, requestUri, subStatusCode); + } + /** * Instantiates a new Request timeout exception. * @@ -95,4 +99,14 @@ public RequestTimeoutException(String message, HttpHeaders headers, SocketAddres super(message, innerException, HttpUtils.asMap(headers), HttpConstants.StatusCodes.REQUEST_TIMEOUT, requestUrl != null ? requestUrl.toString() : null); } + + RequestTimeoutException(String message, + Exception innerException, + HttpHeaders headers, + URI requestUrl, + int subStatusCode) { + super(message, innerException, HttpUtils.asMap(headers), HttpConstants.StatusCodes.REQUEST_TIMEOUT, + requestUrl != null ? requestUrl.toString() : null); + BridgeInternal.setSubStatusCode(this, subStatusCode); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java index 57ac7bfbbbdf..31d303910f11 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java @@ -5,6 +5,7 @@ import com.azure.cosmos.ThrottlingRetryOptions; import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; /** * While this class is public, but it is not part of our published public APIs. @@ -15,15 +16,22 @@ public class RetryPolicy implements IRetryPolicyFactory { private final DiagnosticsClientContext diagnosticsClientContext; private final GlobalEndpointManager globalEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private final boolean enableEndpointDiscovery; private final ThrottlingRetryOptions throttlingRetryOptions; private RxCollectionCache rxCollectionCache; - public RetryPolicy(DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, ConnectionPolicy connectionPolicy) { + public RetryPolicy( + DiagnosticsClientContext diagnosticsClientContext, + GlobalEndpointManager globalEndpointManager, + ConnectionPolicy connectionPolicy, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { + this.diagnosticsClientContext = diagnosticsClientContext; this.enableEndpointDiscovery = connectionPolicy.isEndpointDiscoveryEnabled(); this.globalEndpointManager = globalEndpointManager; this.throttlingRetryOptions = connectionPolicy.getThrottlingRetryOptions(); + this.globalPartitionEndpointManager = globalPartitionEndpointManager; } @Override @@ -32,8 +40,13 @@ public DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clien if (clientContextOverride != null) { effectiveClientContext = clientContextOverride; } - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(effectiveClientContext, - this.globalEndpointManager, this.enableEndpointDiscovery, this.throttlingRetryOptions, this.rxCollectionCache); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy( + effectiveClientContext, + this.globalEndpointManager, + this.enableEndpointDiscovery, + this.throttlingRetryOptions, + this.rxCollectionCache, + this.globalPartitionEndpointManager); return clientRetryPolicy; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 9daa031af71b..2a8027d4f313 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -9,6 +9,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.ConnectionMode; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosContainerProactiveInitConfig; import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.CosmosDiagnosticsContext; @@ -28,6 +29,8 @@ import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.cpu.CpuMemoryListener; import com.azure.cosmos.implementation.cpu.CpuMemoryMonitor; @@ -37,6 +40,7 @@ import com.azure.cosmos.implementation.directconnectivity.ServerStoreModel; import com.azure.cosmos.implementation.directconnectivity.StoreClient; import com.azure.cosmos.implementation.directconnectivity.StoreClientFactory; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.http.HttpClient; @@ -89,7 +93,9 @@ import reactor.core.Exceptions; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import reactor.core.publisher.SignalType; import reactor.util.concurrent.Queues; +import reactor.util.retry.Retry; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -233,6 +239,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization */ private final QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; private final GlobalEndpointManager globalEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; private final RetryPolicy retryPolicy; private HttpClient reactorHttpClient; private Function httpClientInterceptor; @@ -252,6 +259,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization private final boolean sessionCapturingDisabled; private final boolean isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig; private List operationPolicies; + private AtomicReference cachedCosmosAsyncClientSnapshot; public RxDocumentClientImpl(URI serviceEndpoint, String masterKeyOrResourceToken, @@ -548,7 +556,19 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig = isRegionScopedSessionCapturingEnabled; this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); - this.retryPolicy = new RetryPolicy(this, this.globalEndpointManager, this.connectionPolicy); + + this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); + + this.globalPartitionEndpointManagerForCircuitBreaker.init(); + this.cachedCosmosAsyncClientSnapshot = new AtomicReference<>(); + + this.diagnosticsClientConfig.withPartitionLevelCircuitBreakerConfig(this.globalPartitionEndpointManagerForCircuitBreaker.getCircuitBreakerConfig()); + + this.retryPolicy = new RetryPolicy( + this, + this.globalEndpointManager, + this.connectionPolicy, + this.globalPartitionEndpointManagerForCircuitBreaker); this.resetSessionTokenRetryPolicy = retryPolicy; CpuMemoryMonitor.register(this); this.queryPlanCache = new ConcurrentHashMap<>(); @@ -669,6 +689,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.apiType); this.globalEndpointManager.init(); + DatabaseAccount databaseAccountSnapshot = this.initializeGatewayConfigurationReader(); this.resetSessionContainerIfNeeded(databaseAccountSnapshot); @@ -766,6 +787,7 @@ private void initializeDirectConnectivity() { this.clientTelemetry, this.globalEndpointManager); + this.globalPartitionEndpointManagerForCircuitBreaker.setGlobalAddressResolver(this.addressResolver); this.createStoreModel(true); } @@ -1725,7 +1747,7 @@ private Mono addPartitionKeyInformation(RxDocumentServ Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); return collectionObs .map(collectionValueHolder -> { - addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v); + addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v, null); return request; }); } @@ -1734,10 +1756,11 @@ private Mono addPartitionKeyInformation(RxDocumentServ ByteBuffer contentAsByteBuffer, Object document, RequestOptions options, - Mono> collectionObs) { + Mono> collectionObs, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { return collectionObs.map(collectionValueHolder -> { - addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v); + addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v, pointOperationContextForCircuitBreaker); return request; }); } @@ -1745,7 +1768,9 @@ private Mono addPartitionKeyInformation(RxDocumentServ private void addPartitionKeyInformation(RxDocumentServiceRequest request, ByteBuffer contentAsByteBuffer, Object objectDoc, RequestOptions options, - DocumentCollection collection) { + DocumentCollection collection, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); PartitionKeyInternal partitionKeyInternal = null; @@ -1780,9 +1805,16 @@ private void addPartitionKeyInformation(RxDocumentServiceRequest request, serializationEndTime, SerializationDiagnosticsContext.SerializationType.PARTITION_KEY_FETCH_SERIALIZATION ); + SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); + } else if (pointOperationContextForCircuitBreaker != null) { + serializationDiagnosticsContext = pointOperationContextForCircuitBreaker.getSerializationDiagnosticsContext(); + + if (serializationDiagnosticsContext != null) { + serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); + } } } else { @@ -1800,7 +1832,8 @@ private Mono getCreateDocumentRequest(DocumentClientRe RequestOptions options, boolean disableAutomaticIdGeneration, OperationType operationType, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (StringUtils.isEmpty(documentCollectionLink)) { throw new IllegalArgumentException("documentCollectionLink"); @@ -1840,17 +1873,13 @@ private Mono getCreateDocumentRequest(DocumentClientRe request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } - SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - return addPartitionKeyInformation(request, content, document, options, collectionObs); + return addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); } private Mono getBatchDocumentRequest(DocumentClientRetryPolicy requestRetryPolicy, @@ -1890,11 +1919,8 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } - SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); + if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } @@ -1904,13 +1930,40 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - Mono> collectionObs = - this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); + // note: calling onBeforeSendRequest is a cheap operation which injects a CosmosDiagnostics + // instance into 'request' amongst other things - this way metadataDiagnosticsContext is not + // null and can be used for metadata-related telemetry (partition key range, container and server address lookups) + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } - return collectionObs.map((Utils.ValueHolder collectionValueHolder) -> { - addBatchHeaders(request, serverBatchRequest, collectionValueHolder.v); - return request; - }); + MetadataDiagnosticsContext metadataDiagnosticsContext = BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics); + + request.requestContext.setPointOperationContext( + new PointOperationContextForCircuitBreaker( + new AtomicBoolean(false), + false, + documentCollectionLink, + metadataDiagnosticsContext, + serializationDiagnosticsContext)); + + return this.collectionCache.resolveCollectionAsync(metadataDiagnosticsContext, request) + .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + addBatchHeaders(request, serverBatchRequest, documentCollectionValueHolder.v); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request) && options != null) { + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v); + } + + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } + + return Mono.just(request); + })); } private RxDocumentServiceRequest addBatchHeaders(RxDocumentServiceRequest request, @@ -1954,7 +2007,7 @@ private RxDocumentServiceRequest addBatchHeaders(RxDocumentServiceRequest reques * @param httpMethod http method * @return Mono, which on subscription will populate the headers in the request passed in the argument. */ - private Mono populateHeadersAsync(RxDocumentServiceRequest request, RequestVerb httpMethod) { + public Mono populateHeadersAsync(RxDocumentServiceRequest request, RequestVerb httpMethod) { request.getHeaders().put(HttpConstants.HttpHeaders.X_DATE, Utils.nowAsRFC1123()); if (this.masterKeyOrResourceToken != null || this.resourceTokensMap != null || this.cosmosAuthorizationTokenResolver != null || this.credential != null) { @@ -2030,6 +2083,10 @@ private boolean requiresFeedRangeFiltering(RxDocumentServiceRequest request) { return false; } + if (request.hasFeedRangeFilteringBeenApplied()) { + return false; + } + switch (request.getOperationType()) { case ReadFeed: case Query: @@ -2189,15 +2246,18 @@ public Mono> createDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Create, - (opt, e2ecfg, clientCtxOverride) -> createDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> createDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, - clientCtxOverride), + clientCtxOverride, + pointOperationContextForCircuitBreaker, + collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @@ -2207,7 +2267,9 @@ private Mono> createDocumentCore( RequestOptions options, boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy requestRetryPolicy = @@ -2218,8 +2280,9 @@ private Mono> createDocumentCore( } DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; + AtomicReference requestReference = new AtomicReference<>(); - return getPointOperationResponseMonoWithE2ETimeout( + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs(() -> @@ -2229,10 +2292,13 @@ private Mono> createDocumentCore( nonNullRequestOptions, disableAutomaticIdGeneration, finalRetryPolicyInstance, - scopedDiagnosticsFactory), + scopedDiagnosticsFactory, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory - ); + ), requestReference); } private Mono> createDocumentInternal( @@ -2241,15 +2307,43 @@ private Mono> createDocumentInternal( RequestOptions options, boolean disableAutomaticIdGeneration, DocumentClientRetryPolicy requestRetryPolicy, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference documentServiceRequestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); - Mono requestObs = getCreateDocumentRequest(requestRetryPolicy, collectionLink, document, - options, disableAutomaticIdGeneration, OperationType.Create, clientContextOverride); + Mono requestObs = getCreateDocumentRequest( + requestRetryPolicy, + collectionLink, + document, + options, + disableAutomaticIdGeneration, + OperationType.Create, + clientContextOverride, + pointOperationContextForCircuitBreaker); return requestObs - .flatMap(request -> create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options))) + .flatMap(request -> { + + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + documentServiceRequestReference.set(request); + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); + }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); } catch (Exception e) { @@ -2287,6 +2381,111 @@ private static Mono getPointOperationResponseMonoWithE2ETimeout( return rxDocumentServiceResponseMono; } + private Mono handleCircuitBreakingFeedbackForPointOperation( + Mono response, + AtomicReference requestReference) { + + return response + .doOnSuccess(ignore -> { + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest succeededRequest = requestReference.get(); + checkNotNull(succeededRequest.requestContext, "Argument 'succeededRequest.requestContext' must not be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = succeededRequest.requestContext.getPointOperationContextForCircuitBreaker(); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + pointOperationContextForCircuitBreaker.setHasOperationSeenSuccess(); + + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(succeededRequest); + } + }) + .doOnError(throwable -> { + if (throwable instanceof OperationCancelledException) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } else { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } + } + }) + .doFinally(signalType -> { + if (signalType != SignalType.CANCEL) { + return; + } + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + + // scoping the handling of CANCEL signal handling for reasons outside of end-to-end operation timeout + // to purely operations which have end-to-end operation timeout enabled + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } + } + }); + } + + private Mono> handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(Mono> response, RxDocumentServiceRequest request) { + + return response + .doOnSuccess(nonTransientFeedOperationResult -> { + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + if (!nonTransientFeedOperationResult.isError()) { + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker + = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' cannot be null!"); + + feedOperationContextForCircuitBreaker.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + } + } + }) + .doFinally(signalType -> { + if (signalType != SignalType.CANCEL) { + return; + } + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker + = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' cannot be null!"); + + if (!feedOperationContextForCircuitBreaker.getIsRequestHedged() + && feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled() + && feedOperationContextForCircuitBreaker.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { + this.handleLocationCancellationExceptionForPartitionKeyRange(request); + } + } + }); + } + private static Throwable getCancellationExceptionForPointOperations( ScopedDiagnosticsFactory scopedDiagnosticsFactory, Throwable throwable, @@ -2342,10 +2541,11 @@ public Mono> upsertDocument(String collectionLink, Ob return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Upsert, - (opt, e2ecfg, clientCtxOverride) -> upsertDocumentCore( - collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride), + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> upsertDocumentCore( + collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @@ -2355,7 +2555,9 @@ private Mono> upsertDocumentCore( RequestOptions options, boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2365,21 +2567,24 @@ private Mono> upsertDocumentCore( } DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; + AtomicReference requestReference = new AtomicReference<>(); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> upsertDocumentInternal( - collectionLink, - document, - nonNullRequestOptions, - disableAutomaticIdGeneration, - finalRetryPolicyInstance, - scopedDiagnosticsFactory), - finalRetryPolicyInstance), - scopedDiagnosticsFactory - ); + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> upsertDocumentInternal( + collectionLink, + document, + nonNullRequestOptions, + disableAutomaticIdGeneration, + finalRetryPolicyInstance, + scopedDiagnosticsFactory, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), + finalRetryPolicyInstance), + scopedDiagnosticsFactory), requestReference); } private Mono> upsertDocumentInternal( @@ -2388,7 +2593,10 @@ private Mono> upsertDocumentInternal( RequestOptions options, boolean disableAutomaticIdGeneration, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { logger.debug("Upserting a Document. collectionLink: [{}]", collectionLink); @@ -2401,10 +2609,30 @@ private Mono> upsertDocumentInternal( options, disableAutomaticIdGeneration, OperationType.Upsert, - clientContextOverride); + clientContextOverride, + pointOperationContextForCircuitBreaker); return reqObs - .flatMap(request -> upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options))) + .flatMap(request -> { + + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(request); + + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); + }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); } catch (Exception e) { @@ -2417,17 +2645,22 @@ private Mono> upsertDocumentInternal( public Mono> replaceDocument(String documentLink, Object document, RequestOptions options) { + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> replaceDocumentCore( documentLink, document, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + pointOperationContextForCircuitBreaker, + collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @@ -2436,7 +2669,9 @@ private Mono> replaceDocumentCore( Object document, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2448,21 +2683,24 @@ private Mono> replaceDocumentCore( collectionCache, requestRetryPolicy, collectionLink, nonNullRequestOptions); } DocumentClientRetryPolicy finalRequestRetryPolicy = requestRetryPolicy; + AtomicReference requestReference = new AtomicReference<>(); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> replaceDocumentInternal( - documentLink, - document, - nonNullRequestOptions, - finalRequestRetryPolicy, - endToEndPolicyConfig, - scopedDiagnosticsFactory), - requestRetryPolicy), - scopedDiagnosticsFactory - ); + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> replaceDocumentInternal( + documentLink, + document, + nonNullRequestOptions, + finalRequestRetryPolicy, + endToEndPolicyConfig, + scopedDiagnosticsFactory, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), + requestRetryPolicy), + scopedDiagnosticsFactory), requestReference); } private Mono> replaceDocumentInternal( @@ -2471,7 +2709,10 @@ private Mono> replaceDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2489,7 +2730,10 @@ private Mono> replaceDocumentInternal( typedDocument, options, retryPolicyInstance, - clientContextOverride); + clientContextOverride, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker); } catch (Exception e) { logger.debug("Failure in replacing a document due to [{}]", e.getMessage()); @@ -2499,16 +2743,22 @@ private Mono> replaceDocumentInternal( @Override public Mono> replaceDocument(Document document, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(document.getSelfLink()); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> replaceDocumentCore( document, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + pointOperationContextForCircuitBreaker, + collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @@ -2516,7 +2766,9 @@ private Mono> replaceDocumentCore( Document document, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { DocumentClientRetryPolicy requestRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(clientContextOverride); @@ -2526,14 +2778,19 @@ private Mono> replaceDocumentCore( collectionCache, requestRetryPolicy, collectionLink, options); } DocumentClientRetryPolicy finalRequestRetryPolicy = requestRetryPolicy; - return ObservableHelper.inlineIfPossibleAsObs( + AtomicReference requestReference = new AtomicReference<>(); + + return handleCircuitBreakingFeedbackForPointOperation(ObservableHelper.inlineIfPossibleAsObs( () -> replaceDocumentInternal( document, options, finalRequestRetryPolicy, endToEndPolicyConfig, - clientContextOverride), - requestRetryPolicy); + clientContextOverride, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), + requestRetryPolicy), requestReference); } private Mono> replaceDocumentInternal( @@ -2541,7 +2798,10 @@ private Mono> replaceDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (document == null) { @@ -2553,7 +2813,10 @@ private Mono> replaceDocumentInternal( document, options, retryPolicyInstance, - clientContextOverride); + clientContextOverride, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker); } catch (Exception e) { logger.debug("Failure in replacing a database due to [{}]", e.getMessage()); @@ -2566,7 +2829,10 @@ private Mono> replaceDocumentInternal( Document document, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (document == null) { throw new IllegalArgumentException("document"); @@ -2609,12 +2875,9 @@ private Mono> replaceDocumentInternal( request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); + if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } @@ -2624,11 +2887,30 @@ private Mono> replaceDocumentInternal( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); Mono requestObs = - addPartitionKeyInformation(request, content, document, options, collectionObs); + addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); return requestObs - .flatMap(req -> replace(request, retryPolicyInstance) - .map(resp -> toResourceResponse(resp, Document.class))); + .flatMap(req -> { + + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); + + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return replace(request, retryPolicyInstance); + }) + .map(resp -> toResourceResponse(resp, Document.class)); } private CosmosEndToEndOperationLatencyPolicyConfig getEndToEndOperationLatencyPolicyConfig( @@ -2664,17 +2946,23 @@ private CosmosEndToEndOperationLatencyPolicyConfig getEffectiveEndToEndOperation public Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, - (opt, e2ecfg, clientCtxOverride) -> patchDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> patchDocumentCore( documentLink, cosmosPatchOperations, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + pointOperationContextForCircuitBreaker, + collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @@ -2683,25 +2971,32 @@ private Mono> patchDocumentCore( CosmosPatchOperations cosmosPatchOperations, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy documentClientRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> patchDocumentInternal( - documentLink, - cosmosPatchOperations, - nonNullRequestOptions, - documentClientRetryPolicy, - scopedDiagnosticsFactory), - documentClientRetryPolicy), - scopedDiagnosticsFactory - ); + AtomicReference requestReference = new AtomicReference<>(); + + return handleCircuitBreakingFeedbackForPointOperation( + getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> patchDocumentInternal( + documentLink, + cosmosPatchOperations, + nonNullRequestOptions, + documentClientRetryPolicy, + scopedDiagnosticsFactory, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), + documentClientRetryPolicy), + scopedDiagnosticsFactory), requestReference); } private Mono> patchDocumentInternal( @@ -2709,7 +3004,10 @@ private Mono> patchDocumentInternal( CosmosPatchOperations cosmosPatchOperations, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { checkArgument(StringUtils.isNotEmpty(documentLink), "expected non empty documentLink"); checkNotNull(cosmosPatchOperations, "expected non null cosmosPatchOperations"); @@ -2757,6 +3055,7 @@ private Mono> patchDocumentInternal( SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); + if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } @@ -2770,42 +3069,74 @@ private Mono> patchDocumentInternal( null, null, options, - collectionObs); + collectionObs, + pointOperationContextForCircuitBreaker); return requestObs - .flatMap(req -> patch(request, retryPolicyInstance)) + .flatMap(req -> { + + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); + + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return patch(request, retryPolicyInstance); + }) .map(resp -> toResourceResponse(resp, Document.class)); } @Override public Mono> deleteDocument(String documentLink, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> deleteDocumentCore( documentLink, null, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + pointOperationContextForCircuitBreaker, + collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @Override public Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> deleteDocumentCore( documentLink, internalObjectNode, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + pointOperationContextForCircuitBreaker, + collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled() + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink ); } @@ -2814,26 +3145,32 @@ private Mono> deleteDocumentCore( InternalObjectNode internalObjectNode, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy requestRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> deleteDocumentInternal( - documentLink, - internalObjectNode, - nonNullRequestOptions, - requestRetryPolicy, - scopedDiagnosticsFactory), - requestRetryPolicy), - scopedDiagnosticsFactory - ); + AtomicReference requestReference = new AtomicReference<>(); + + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> deleteDocumentInternal( + documentLink, + internalObjectNode, + nonNullRequestOptions, + requestRetryPolicy, + scopedDiagnosticsFactory, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), + requestRetryPolicy), + scopedDiagnosticsFactory), requestReference); } private Mono> deleteDocumentInternal( @@ -2841,7 +3178,10 @@ private Mono> deleteDocumentInternal( InternalObjectNode internalObjectNode, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2866,20 +3206,34 @@ private Mono> deleteDocumentInternal( request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); Mono requestObs = addPartitionKeyInformation( - request, null, internalObjectNode, options, collectionObs); - + request, null, internalObjectNode, options, collectionObs, pointOperationContextForCircuitBreaker); return requestObs - .flatMap(req -> this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options))) + .flatMap(req -> { + + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); + + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); + }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); } catch (Exception e) { @@ -2914,7 +3268,7 @@ private Mono> deleteAllDocumentsByPartitionKeyInterna Mono> collectionObs = collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs); + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, null); return requestObs.flatMap(req -> this .deleteAllItemsByPartitionKey(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)) @@ -2935,13 +3289,16 @@ private Mono> readDocument( RequestOptions options, DiagnosticsClientContext innerDiagnosticsFactory) { + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Read, - (opt, e2ecfg, clientCtxOverride) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride), + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, false, - innerDiagnosticsFactory + innerDiagnosticsFactory, + collectionLink ); } @@ -2949,13 +3306,17 @@ private Mono> readDocumentCore( String documentLink, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy retryPolicyInstance = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); - return getPointOperationResponseMonoWithE2ETimeout( + AtomicReference requestReference = new AtomicReference<>(); + + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs( @@ -2963,17 +3324,23 @@ private Mono> readDocumentCore( documentLink, nonNullRequestOptions, retryPolicyInstance, - scopedDiagnosticsFactory), + scopedDiagnosticsFactory, + requestReference, + collectionRoutingMap, + pointOperationContextForCircuitBreaker), retryPolicyInstance), scopedDiagnosticsFactory - ); + ), requestReference); } private Mono> readDocumentInternal( String documentLink, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2992,17 +3359,31 @@ private Mono> readDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludedRegions()); request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs); + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); + + return requestObs.flatMap(req -> { + + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); - return requestObs.flatMap(req -> - this.read(request, retryPolicyInstance) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class))); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return this.read(req, retryPolicyInstance) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + }); } catch (Exception e) { logger.debug("Failure in reading a document due to [{}]", e.getMessage()); @@ -3358,7 +3739,7 @@ private Flux> queryForReadMany( sqlQuery, rangeQueryMap, options, - collection.getResourceId(), + collection, parentResourceLink, activityId, klass, @@ -3560,15 +3941,16 @@ public Mono executeFeedOperationWithAvailabilityStrategy( OperationType operationType, Supplier retryPolicyFactory, RxDocumentServiceRequest req, - BiFunction, RxDocumentServiceRequest, Mono> feedOperation) { + BiFunction, RxDocumentServiceRequest, Mono> feedOperation, + String collectionLink) { return RxDocumentClientImpl.this.executeFeedOperationWithAvailabilityStrategy( resourceType, operationType, retryPolicyFactory, req, - feedOperation - ); + feedOperation, + collectionLink); } @Override @@ -3581,6 +3963,53 @@ public Mono readFeedAsync(RxDocumentServiceRequest re // TODO Auto-generated method stub return null; } + + @Override + public Mono populateFeedRangeHeader(RxDocumentServiceRequest request) { + + if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { + return request + .getFeedRange() + .populateFeedRangeFilteringHeaders(RxDocumentClientImpl.this.partitionKeyRangeCache, request, RxDocumentClientImpl.this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request)) + .flatMap(ignore -> Mono.just(request)); + } else { + return Mono.just(request); + } + } + + @Override + public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { + + if (RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + String collectionRid = RxDocumentClientImpl.qryOptAccessor.getCollectionRid(queryRequestOptions); + + checkNotNull(collectionRid, "Argument 'collectionRid' cannot be null!"); + + return RxDocumentClientImpl.this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), collectionRid, null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + if (collectionRoutingMapValueHolder.v == null) { + return Mono.error(new CollectionRoutingMapNotFoundException("Argument 'collectionRoutingMapValueHolder.v' cannot be null!")); + } + + RxDocumentClientImpl.this.addPartitionLevelUnavailableRegionsForFeedRequest(request, queryRequestOptions, collectionRoutingMapValueHolder.v); + return Mono.just(request); + }); + } else { + return Mono.just(request); + } + } + + @Override + public GlobalEndpointManager getGlobalEndpointManager() { + return RxDocumentClientImpl.this.getGlobalEndpointManager(); + } + + @Override + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker; + } }; } @@ -4014,7 +4443,10 @@ public Mono executeBatchRequest(String collectionLink, RequestOptions options, boolean disableAutomaticIdGeneration) { DocumentClientRetryPolicy documentClientRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(null); - return ObservableHelper.inlineIfPossibleAsObs(() -> executeBatchRequestInternal(collectionLink, serverBatchRequest, options, documentClientRetryPolicy, disableAutomaticIdGeneration), documentClientRetryPolicy); + AtomicReference requestReference = new AtomicReference<>(); + return handleCircuitBreakingFeedbackForPointOperation(ObservableHelper + .inlineIfPossibleAsObs(() -> executeBatchRequestInternal( + collectionLink, serverBatchRequest, options, documentClientRetryPolicy, disableAutomaticIdGeneration, requestReference), documentClientRetryPolicy), requestReference); } private Mono executeStoredProcedureInternal(String storedProcedureLink, @@ -4059,14 +4491,19 @@ private Mono executeBatchRequestInternal(String collectionL ServerBatchRequest serverBatchRequest, RequestOptions options, DocumentClientRetryPolicy requestRetryPolicy, - boolean disableAutomaticIdGeneration) { + boolean disableAutomaticIdGeneration, + AtomicReference requestReference) { try { logger.debug("Executing a Batch request with number of operations {}", serverBatchRequest.getOperations().size()); Mono requestObs = getBatchDocumentRequest(requestRetryPolicy, collectionLink, serverBatchRequest, options, disableAutomaticIdGeneration); + Mono responseObservable = - requestObs.flatMap(request -> create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options))); + requestObs.flatMap(request -> { + requestReference.set(request); + return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); + }); return responseObservable .map(serviceResponse -> BatchResponseParser.fromDocumentServiceResponse(serviceResponse, serverBatchRequest, true)); @@ -5089,7 +5526,9 @@ private Flux> nonDocumentReadFeedInternal( nonNullOptions, createRequestFunc, executeFunc, - maxPageSize); + maxPageSize, + this.globalEndpointManager, + this.globalPartitionEndpointManagerForCircuitBreaker); } @Override @@ -5133,6 +5572,10 @@ public void setSession(ISessionContainer sessionContainer) { this.sessionContainer = sessionContainer; } + public CosmosAsyncClient getCachedCosmosAsyncClientSnapshot() { + return cachedCosmosAsyncClientSnapshot.get(); + } + @Override public RxClientCollectionCache getCollectionCache() { return this.collectionCache; @@ -5148,6 +5591,11 @@ public GlobalEndpointManager getGlobalEndpointManager() { return this.globalEndpointManager; } + @Override + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return this.globalPartitionEndpointManagerForCircuitBreaker; + } + @Override public AddressSelector getAddressSelector() { return new AddressSelector(this.addressResolver, this.configs.getProtocol()); @@ -5457,12 +5905,124 @@ static UUID randomUuid(long msb, long lsb) { return new UUID(msb, lsb); } + public void addPartitionLevelUnavailableRegionsForRequest( + RxDocumentServiceRequest request, + RequestOptions options, + CollectionRoutingMap collectionRoutingMap) { + + checkNotNull(request, "Argument 'request' cannot be null!"); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + checkNotNull(options, "Argument 'options' cannot be null!"); + checkNotNull(options.getPartitionKeyDefinition(), "Argument 'partitionKeyDefinition' within options cannot be null!"); + checkNotNull(collectionRoutingMap, "Argument 'collectionRoutingMap' cannot be null!"); + + PartitionKeyDefinition partitionKeyDefinition = options.getPartitionKeyDefinition(); + PartitionKeyInternal partitionKeyInternal = request.getPartitionKeyInternal(); + + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKeyInternal, partitionKeyDefinition); + PartitionKeyRange partitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + + checkNotNull(partitionKeyRange, "partitionKeyRange cannot be null!"); + checkNotNull(this.globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + + List unavailableRegionsForPartition + = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( + request.getResourceId(), + partitionKeyRange, + request.getOperationType()); + + // cache the effective partition key if possible - can be a bottleneck, + // since it is also recomputed in AddressResolver + request.setEffectivePartitionKey(effectivePartitionKeyString); + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + } + + public void mergeContextInformationIntoDiagnosticsForPointRequest( + RxDocumentServiceRequest request, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext + = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + SerializationDiagnosticsContext serializationDiagnosticsContext + = pointOperationContextForCircuitBreaker.getSerializationDiagnosticsContext(); + + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + diagnosticsAccessor.mergeSerializationDiagnosticContext(request.requestContext.cosmosDiagnostics, serializationDiagnosticsContext); + } + } + + public void addPartitionLevelUnavailableRegionsForFeedRequest( + RxDocumentServiceRequest request, + CosmosQueryRequestOptions options, + CollectionRoutingMap collectionRoutingMap) { + + checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); + + PartitionKeyRange resolvedPartitionKeyRange = null; + + if (request.getPartitionKeyRangeIdentity() != null) { + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByPartitionKeyRangeId(request.getPartitionKeyRangeIdentity().getPartitionKeyRangeId()); + } else if (request.getPartitionKeyInternal() != null) { + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(request.getPartitionKeyInternal(), ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getPartitionKeyDefinition(options)); + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + } + + checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + + List unavailableRegionsForPartition + = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( + request.getResourceId(), + resolvedPartitionKeyRange, + request.getOperationType()); + + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + } + + public void addPartitionLevelUnavailableRegionsForChangeFeedRequest( + RxDocumentServiceRequest request, + CosmosChangeFeedRequestOptions options, + CollectionRoutingMap collectionRoutingMap) { + checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); + + PartitionKeyRange resolvedPartitionKeyRange = null; + + if (request.getPartitionKeyRangeIdentity() != null) { + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByPartitionKeyRangeId(request.getPartitionKeyRangeIdentity().getPartitionKeyRangeId()); + } else if (request.getPartitionKeyInternal() != null) { + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(request.getPartitionKeyInternal(), ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor().getPartitionKeyDefinition(options)); + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + } + + checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + + List unavailableRegionsForPartition + = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( + request.getResourceId(), + resolvedPartitionKeyRange, + request.getOperationType()); + + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + } + private Mono> wrapPointOperationWithAvailabilityStrategy( ResourceType resourceType, OperationType operationType, DocumentPointOperation callback, RequestOptions initialRequestOptions, - boolean idempotentWriteRetriesEnabled) { + boolean idempotentWriteRetriesEnabled, + String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( resourceType, @@ -5470,7 +6030,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat callback, initialRequestOptions, idempotentWriteRetriesEnabled, - this + this, + collectionLink ); } @@ -5480,163 +6041,215 @@ private Mono> wrapPointOperationWithAvailabilityStrat DocumentPointOperation callback, RequestOptions initialRequestOptions, boolean idempotentWriteRetriesEnabled, - DiagnosticsClientContext innerDiagnosticsFactory) { + DiagnosticsClientContext innerDiagnosticsFactory, + String collectionLink) { - checkNotNull(resourceType, "Argument 'resourceType' must not be null."); - checkNotNull(operationType, "Argument 'operationType' must not be null."); - checkNotNull(callback, "Argument 'callback' must not be null."); + final MetadataDiagnosticsContext metadataDiagnosticsContext = new MetadataDiagnosticsContext(); - final RequestOptions nonNullRequestOptions = - initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); + return Mono.defer(() -> this.collectionCache.resolveByNameAsync(metadataDiagnosticsContext, collectionLink, null) + .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, collection.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - checkArgument( - resourceType == ResourceType.Document, - "This method can only be used for document point operations."); + checkNotNull(resourceType, "Argument 'resourceType' must not be null."); + checkNotNull(operationType, "Argument 'operationType' must not be null."); + checkNotNull(callback, "Argument 'callback' must not be null."); - CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = - getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); + final RequestOptions nonNullRequestOptions = + initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); - List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( - endToEndPolicyConfig, - resourceType, - operationType, - idempotentWriteRetriesEnabled, - nonNullRequestOptions); - - if (orderedApplicableRegionsForSpeculation.size() < 2) { - // There is at most one applicable region - no hedging possible - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory); - } + checkArgument( + resourceType == ResourceType.Document, + "This method can only be used for document point operations."); - ThresholdBasedAvailabilityStrategy availabilityStrategy = - (ThresholdBasedAvailabilityStrategy)endToEndPolicyConfig.getAvailabilityStrategy(); - List> monoList = new ArrayList<>(); + CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = + getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); - final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); + if (collectionRoutingMapValueHolder.v == null) { + return Mono.error(new CollectionRoutingMapNotFoundException("Argument 'collectionRoutingMapValueHolder.v' cannot be null!")); + } - orderedApplicableRegionsForSpeculation - .forEach(region -> { - RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); + nonNullRequestOptions.setPartitionKeyDefinition(collection.getPartitionKey()); - if (monoList.isEmpty()) { - // no special error handling for transient errors to suppress them here - // because any cross-regional retries are expected to be processed - // by the ClientRetryPolicy for the initial request - so, any outcome of the - // initial Mono should be treated as non-transient error - even when - // the error would otherwise be treated as transient - Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory) - .map(NonTransientPointOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isCosmosException, - t -> Mono.just( - new NonTransientPointOperationResult( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); - - if (logger.isDebugEnabled()) { - monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( - "STARTING to process {} operation in region '{}'", - operationType, - region))); - } else { - monoList.add(initialMonoAcrossAllRegions); - } - } else { - clonedOptions.setExcludedRegions( - getEffectiveExcludedRegionsForHedging( - nonNullRequestOptions.getExcludedRegions(), - orderedApplicableRegionsForSpeculation, - region) - ); + List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( + endToEndPolicyConfig, + resourceType, + operationType, + idempotentWriteRetriesEnabled, + nonNullRequestOptions); - // Non-Transient errors are mapped to a value - this ensures the firstWithValue - // operator below will complete the composite Mono for both successful values - // and non-transient errors - Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory) - .map(NonTransientPointOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isNonTransientCosmosException, - t -> Mono.just( - new NonTransientPointOperationResult( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + AtomicBoolean isOperationSuccessful = new AtomicBoolean(false); - Duration delayForCrossRegionalRetry = (availabilityStrategy) - .getThreshold() - .plus((availabilityStrategy) - .getThresholdStep() - .multipliedBy(monoList.size() - 1)); + if (orderedApplicableRegionsForSpeculation.size() < 2) { + // There is at most one applicable region - no hedging possible + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + false, + collectionLink, + metadataDiagnosticsContext, + new SerializationDiagnosticsContext()); - if (logger.isDebugEnabled()) { - monoList.add( - regionalCrossRegionRetryMono - .doOnSubscribe(c -> logger.debug("STARTING to process {} operation in region '{}'", operationType, region)) - .delaySubscription(delayForCrossRegionalRetry)); - } else { - monoList.add( - regionalCrossRegionRetryMono - .delaySubscription(delayForCrossRegionalRetry)); + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder); } - } - }); - // NOTE - merging diagnosticsFactory cannot only happen in - // doFinally operator because the doFinally operator is a side effect method - - // meaning it executes concurrently with firing the onComplete/onError signal - // doFinally is also triggered by cancellation - // So, to make sure merging the Context happens synchronously in line we - // have to ensure merging is happening on error/completion - // and also in doOnCancel. - return Mono - .firstWithValue(monoList) - .flatMap(nonTransientResult -> { - diagnosticsFactory.merge(nonNullRequestOptions); - if (nonTransientResult.isError()) { - return Mono.error(nonTransientResult.exception); - } - - return Mono.just(nonTransientResult.response); - }) - .onErrorMap(throwable -> { - Throwable exception = Exceptions.unwrap(throwable); + ThresholdBasedAvailabilityStrategy availabilityStrategy = + (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); + List> monoList = new ArrayList<>(); + + final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); + + orderedApplicableRegionsForSpeculation + .forEach(region -> { + RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); + + if (monoList.isEmpty()) { + // no special error handling for transient errors to suppress them here + // because any cross-regional retries are expected to be processed + // by the ClientRetryPolicy for the initial request - so, any outcome of the + // initial Mono should be treated as non-transient error - even when + // the error would otherwise be treated as transient + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest + = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + true, + collectionLink, + metadataDiagnosticsContext, + new SerializationDiagnosticsContext()); + + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + Mono initialMonoAcrossAllRegions = + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder) + .map(NonTransientPointOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isCosmosException, + t -> Mono.just( + new NonTransientPointOperationResult( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + + if (logger.isDebugEnabled()) { + monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( + "STARTING to process {} operation in region '{}'", + operationType, + region))); + } else { + monoList.add(initialMonoAcrossAllRegions); + } + } else { + clonedOptions.setExcludedRegions( + getEffectiveExcludedRegionsForHedging( + nonNullRequestOptions.getExcludedRegions(), + orderedApplicableRegionsForSpeculation, + region) + ); + + // Non-Transient errors are mapped to a value - this ensures the firstWithValue + // operator below will complete the composite Mono for both successful values + // and non-transient errors + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForHedgedRequest + = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + true, + collectionLink, + metadataDiagnosticsContext, + new SerializationDiagnosticsContext()); + + pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); + Mono regionalCrossRegionRetryMono = + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForHedgedRequest, collectionRoutingMapValueHolder) + .map(NonTransientPointOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isNonTransientCosmosException, + t -> Mono.just( + new NonTransientPointOperationResult( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + + Duration delayForCrossRegionalRetry = (availabilityStrategy) + .getThreshold() + .plus((availabilityStrategy) + .getThresholdStep() + .multipliedBy(monoList.size() - 1)); + + if (logger.isDebugEnabled()) { + monoList.add( + regionalCrossRegionRetryMono + .doOnSubscribe(c -> logger.debug("STARTING to process {} operation in region '{}'", operationType, region)) + .delaySubscription(delayForCrossRegionalRetry)); + } else { + monoList.add( + regionalCrossRegionRetryMono + .delaySubscription(delayForCrossRegionalRetry)); + } + } + }); - if (exception instanceof NoSuchElementException) { + // NOTE - merging diagnosticsFactory cannot only happen in + // doFinally operator because the doFinally operator is a side effect method - + // meaning it executes concurrently with firing the onComplete/onError signal + // doFinally is also triggered by cancellation + // So, to make sure merging the Context happens synchronously in line we + // have to ensure merging is happening on error/completion + // and also in doOnCancel. + return Mono + .firstWithValue(monoList) + .flatMap(nonTransientResult -> { + diagnosticsFactory.merge(nonNullRequestOptions); + if (nonTransientResult.isError()) { + return Mono.error(nonTransientResult.exception); + } - List innerThrowables = Exceptions - .unwrapMultiple(exception.getCause()); + return Mono.just(nonTransientResult.response); + }) + .onErrorMap(throwable -> { + Throwable exception = Exceptions.unwrap(throwable); + + if (exception instanceof NoSuchElementException) { + + List innerThrowables = Exceptions + .unwrapMultiple(exception.getCause()); + + int index = 0; + for (Throwable innerThrowable : innerThrowables) { + Throwable innerException = Exceptions.unwrap(innerThrowable); + + // collect latest CosmosException instance bubbling up for a region + if (innerException instanceof CosmosException) { + CosmosException cosmosException = Utils.as(innerException, CosmosException.class); + diagnosticsFactory.merge(nonNullRequestOptions); + return cosmosException; + } else if (innerException instanceof NoSuchElementException) { + logger.trace( + "Operation in {} completed with empty result because it was cancelled.", + orderedApplicableRegionsForSpeculation.get(index)); + } else if (logger.isWarnEnabled()) { + String message = "Unexpected Non-CosmosException when processing operation in '" + + orderedApplicableRegionsForSpeculation.get(index) + + "'."; + logger.warn( + message, + innerException + ); + } - int index = 0; - for (Throwable innerThrowable : innerThrowables) { - Throwable innerException = Exceptions.unwrap(innerThrowable); + index++; + } + } - // collect latest CosmosException instance bubbling up for a region - if (innerException instanceof CosmosException) { - CosmosException cosmosException = Utils.as(innerException, CosmosException.class); diagnosticsFactory.merge(nonNullRequestOptions); - return cosmosException; - } else if (innerException instanceof NoSuchElementException) { - logger.trace( - "Operation in {} completed with empty result because it was cancelled.", - orderedApplicableRegionsForSpeculation.get(index)); - } else if (logger.isWarnEnabled()) { - String message = "Unexpected Non-CosmosException when processing operation in '" - + orderedApplicableRegionsForSpeculation.get(index) - + "'."; - logger.warn( - message, - innerException - ); - } - index++; - } - } - - diagnosticsFactory.merge(nonNullRequestOptions); - - return exception; - }) - .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); + return exception; + }) + .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); + })) + ) + .retryWhen(Retry.fixedDelay(Configs.getStaleCollectionCacheRefreshRetryCount(), Duration.ofSeconds(Configs.getStaleCollectionCacheRefreshRetryIntervalInSeconds())) + .filter(throwable -> throwable instanceof CollectionRoutingMapNotFoundException) + .doBeforeRetry((retrySignal) -> this.collectionCache + .refresh( + null, + collectionLink, + null) + ) + ); } private static boolean isCosmosException(Throwable t) { @@ -5816,8 +6429,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( final OperationType operationType, final Supplier retryPolicyFactory, final RxDocumentServiceRequest req, - final BiFunction, RxDocumentServiceRequest, Mono> feedOperation - ) { + final BiFunction, RxDocumentServiceRequest, Mono> feedOperation, + final String collectionLink) { + checkNotNull(retryPolicyFactory, "Argument 'retryPolicyFactory' must not be null."); checkNotNull(req, "Argument 'req' must not be null."); assert(resourceType == ResourceType.Document); @@ -5832,14 +6446,34 @@ private Mono executeFeedOperationWithAvailabilityStrategy( resourceType, operationType, false, - initialExcludedRegions - ); + initialExcludedRegions); + + Map partitionKeyRangesWithSuccess = new ConcurrentHashMap<>(); + if (orderedApplicableRegionsForSpeculation.size() < 2) { + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + false, + collectionLink); + + feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow); + // There is at most one applicable region - no hedging possible return feedOperation.apply(retryPolicyFactory, req); } + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + true, + collectionLink); + + feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow); + ThresholdBasedAvailabilityStrategy availabilityStrategy = (ThresholdBasedAvailabilityStrategy)endToEndPolicyConfig.getAvailabilityStrategy(); List>> monoList = new ArrayList<>(); @@ -5854,14 +6488,23 @@ private Mono executeFeedOperationWithAvailabilityStrategy( // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForNonHedgedRequest + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + true, + collectionLink); + + feedOperationContextForCircuitBreakerForNonHedgedRequest.setIsRequestHedged(false); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForNonHedgedRequest); + Mono> initialMonoAcrossAllRegions = - feedOperation.apply(retryPolicyFactory, clonedRequest) - .map(NonTransientFeedOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isCosmosException, - t -> Mono.just( - new NonTransientFeedOperationResult<>( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(feedOperation.apply(retryPolicyFactory, clonedRequest) + .map(NonTransientFeedOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isCosmosException, + t -> Mono.just( + new NonTransientFeedOperationResult<>( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))), clonedRequest); if (logger.isDebugEnabled()) { monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( @@ -5879,19 +6522,28 @@ private Mono executeFeedOperationWithAvailabilityStrategy( region) ); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForHedgedRequest + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + true, + collectionLink); + + feedOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForHedgedRequest); + clonedRequest.requestContext.setKeywordIdentifiers(req.requestContext.getKeywordIdentifiers()); // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors Mono> regionalCrossRegionRetryMono = - feedOperation.apply(retryPolicyFactory, clonedRequest) - .map(NonTransientFeedOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isNonTransientCosmosException, - t -> Mono.just( - new NonTransientFeedOperationResult<>( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(feedOperation.apply(retryPolicyFactory, clonedRequest) + .map(NonTransientFeedOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isNonTransientCosmosException, + t -> Mono.just( + new NonTransientFeedOperationResult<>( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))), clonedRequest); Duration delayForCrossRegionalRetry = (availabilityStrategy) .getThreshold() @@ -5965,9 +6617,25 @@ private Mono executeFeedOperationWithAvailabilityStrategy( }); } + private void handleLocationCancellationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { + + URI firstContactedLocationEndpoint = diagnosticsAccessor + .getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); + + if (firstContactedLocationEndpoint != null) { + this.globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + } + } + @FunctionalInterface private interface DocumentPointOperation { - Mono> apply(RequestOptions requestOptions, CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, DiagnosticsClientContext clientContextOverride); + Mono> apply( + RequestOptions requestOptions, + CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, + Utils.ValueHolder collectionRoutingMap); } private static class NonTransientPointOperationResult { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index 5d39d58914ad..a0b7daf4b351 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -87,6 +87,8 @@ public class RxDocumentServiceRequest implements Cloneable { private volatile boolean nonIdempotentWriteRetriesEnabled = false; + private volatile boolean hasFeedRangeFilteringBeenApplied = false; + public boolean isReadOnlyRequest() { return this.operationType.isReadOnlyOperation(); } @@ -120,6 +122,14 @@ public boolean getNonIdempotentWriteRetriesEnabled() { return this.nonIdempotentWriteRetriesEnabled; } + public boolean hasFeedRangeFilteringBeenApplied() { + return this.hasFeedRangeFilteringBeenApplied; + } + + public void setHasFeedRangeFilteringBeenApplied(boolean hasFeedRangeFilteringBeenApplied) { + this.hasFeedRangeFilteringBeenApplied = hasFeedRangeFilteringBeenApplied; + } + public boolean isReadOnly() { return this.isReadOnlyRequest() || this.isReadOnlyScript(); } @@ -1091,6 +1101,8 @@ public RxDocumentServiceRequest clone() { rxDocumentServiceRequest.feedRange = this.feedRange; rxDocumentServiceRequest.effectiveRange = this.effectiveRange; rxDocumentServiceRequest.isFeed = this.isFeed; + rxDocumentServiceRequest.resourceId = this.resourceId; + rxDocumentServiceRequest.hasFeedRangeFilteringBeenApplied = this.hasFeedRangeFilteringBeenApplied; return rxDocumentServiceRequest; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index a31664d4ccde..c613887966d7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.GatewayServiceConfigurationReader; import com.azure.cosmos.implementation.directconnectivity.HttpUtils; import com.azure.cosmos.implementation.directconnectivity.RequestHelper; @@ -79,6 +80,7 @@ public RxGatewayStoreModel( GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType) { + this.clientContext = clientContext; this.defaultHeaders = new HashMap<>(); this.defaultHeaders.put(HttpConstants.HttpHeaders.CACHE_CONTROL, @@ -403,6 +405,7 @@ private Mono toDocumentServiceResponse(Mono invokeAsyncInternal(RxDocumentServiceReq } private Mono invokeAsync(RxDocumentServiceRequest request) { + Callable> funcDelegate = () -> invokeAsyncInternal(request).single(); MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); @@ -735,7 +739,8 @@ private Mono applySessionToken(RxDocumentServiceRequest request) { SessionTokenHelper.setPartitionLocalSessionToken(request, sessionContainer); } } else if (partitionKeyInternal != null) { - String effectivePartitionKeyString = PartitionKeyInternalHelper + String effectivePartitionKeyString = StringUtils.isNotEmpty(request.getEffectivePartitionKey()) ? + request.getEffectivePartitionKey() : PartitionKeyInternalHelper .getEffectivePartitionKeyString( partitionKeyInternal, collectionValueHolder.v.getPartitionKey()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java index 0042bb612496..0537a225e248 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java @@ -9,6 +9,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.ThrottlingRetryOptions; import com.azure.cosmos.implementation.AsyncDocumentClient; +import com.azure.cosmos.implementation.CollectionRoutingMapNotFoundException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ResourceThrottleRetryPolicy; @@ -110,7 +111,7 @@ static Mono resolvePartitionKeyRangeId( if (operation instanceof ItemBulkOperation) { final ItemBulkOperation itemBulkOperation = (ItemBulkOperation) operation; - final Mono pkRangeIdMono = Mono.defer(() -> + return Mono.defer(() -> BulkExecutorUtil.getCollectionInfoAsync(docClientWrapper, container, collectionBeforeRecreation.get()) .flatMap(collection -> { final PartitionKeyDefinition definition = collection.getPartitionKey(); @@ -152,8 +153,6 @@ static Mono resolvePartitionKeyRangeId( null) ) ); - - return pkRangeIdMono; } else { throw new UnsupportedOperationException("Unknown CosmosItemOperation."); } @@ -202,26 +201,4 @@ static boolean isWriteOperation(CosmosItemOperationType cosmosItemOperationType) cosmosItemOperationType == CosmosItemOperationType.DELETE || cosmosItemOperationType == CosmosItemOperationType.PATCH; } - - static class CollectionRoutingMapNotFoundException extends CosmosException { - - private static final long serialVersionUID = 1L; - - /** - * Instantiates a new Invalid partition exception. - * - * @param msg the msg - */ - public CollectionRoutingMapNotFoundException(String msg) { - super(HttpConstants.StatusCodes.NOTFOUND, msg); - setSubStatus(); - } - - private void setSubStatus() { - this.getResponseHeaders().put( - WFConstants.BackendHeaders.SUB_STATUS, - Integer.toString(HttpConstants.SubStatusCodes.INCORRECT_CONTAINER_RID_SUB_STATUS)); - } - } - } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java index dab4f2838194..9750771b6147 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java @@ -249,6 +249,7 @@ private Mono> getPartitionKeyRange(MetadataDiagnosticsCo ); //this request doesn't actually go to server request.requestContext.resolvedCollectionRid = collectionRid; + request.setResourceId(collectionRid); Mono collectionObs = collectionCache.resolveCollectionAsync(metaDataDiagnosticsContext, request) .map(collectionValueHolder -> collectionValueHolder.v); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java new file mode 100644 index 000000000000..e1b12d00a037 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -0,0 +1,241 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ConsecutiveExceptionBasedCircuitBreaker { + + private static final Logger logger = LoggerFactory.getLogger(ConsecutiveExceptionBasedCircuitBreaker.class); + private final PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig; + + public ConsecutiveExceptionBasedCircuitBreaker(PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig) { + this.partitionLevelCircuitBreakerConfig = partitionLevelCircuitBreakerConfig; + } + + public LocationSpecificHealthContext handleException( + LocationSpecificHealthContext locationSpecificHealthContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String regionWithException, + boolean isReadOnlyRequest) { + + int exceptionCountAfterHandling + = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + + LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); + + switch (locationHealthStatus) { + case Healthy: + return locationSpecificHealthContext; + case HealthyWithFailures: + case HealthyTentative: + + exceptionCountAfterHandling++; + int successCountAfterHandling = 0; + + LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + + if (isReadOnlyRequest) { + + return builder + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) + .withSuccessCountForReadForRecovery(successCountAfterHandling) + .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) + .build(); + + } else { + + return builder + .withSuccessCountForWriteForRecovery(successCountAfterHandling) + .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) + .build(); + } + case Unavailable: + // the tests done so far view this as an unreachable piece of code - but not failing the operation + // with IllegalStateException and simply logging that a presumed unreachable code path seems to make sense for now + logger.warn("Region {} should not be handling failures in {} health status for partition key range : {} and collection RID : {}", + regionWithException, + locationHealthStatus.getStringifiedLocationHealthStatus(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive() + "-" + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getCollectionResourceId()); + return locationSpecificHealthContext; + default: + throw new IllegalArgumentException("Unsupported health status : " + locationHealthStatus); + } + } + + public LocationSpecificHealthContext handleSuccess( + LocationSpecificHealthContext locationSpecificHealthContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String regionWithSuccess, + boolean isReadOnlyRequest) { + + int exceptionCountAfterHandling + = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + + int successCountAfterHandling + = (isReadOnlyRequest) ? locationSpecificHealthContext.getSuccessCountForReadForRecovery() : locationSpecificHealthContext.getSuccessCountForWriteForRecovery(); + + LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); + + switch (locationHealthStatus) { + case Healthy: + return locationSpecificHealthContext; + case HealthyWithFailures: + + exceptionCountAfterHandling = 0; + + LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + + if (isReadOnlyRequest) { + + return builder + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) + .build(); + + } else { + + return builder + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) + .build(); + } + case HealthyTentative: + + successCountAfterHandling++; + + builder = new LocationSpecificHealthContext.Builder() + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + + if (isReadOnlyRequest) { + + return builder + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) + .withSuccessCountForReadForRecovery(successCountAfterHandling) + .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) + .build(); + + } else { + + return builder + .withSuccessCountForWriteForRecovery(successCountAfterHandling) + .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) + .build(); + + } + case Unavailable: + // the tests done so far view this as an unreachable piece of code - but not failing the operation + // and simply logging that a presumed unreachable code path seems to make sense for now + logger.warn("Region {} should not be handling successes in {} health status for partition key range : {} and collection RID : {}", + regionWithSuccess, + locationHealthStatus.getStringifiedLocationHealthStatus(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive() + "-" + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getCollectionResourceId()); + return locationSpecificHealthContext; + default: + throw new IllegalArgumentException("Unsupported health status : " + locationHealthStatus); + } + } + + public boolean shouldHealthStatusBeDowngraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { + + int exceptionCountActual + = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + + return exceptionCountActual >= getAllowedExceptionCountToMaintainStatus(locationSpecificHealthContext.getLocationHealthStatus(), isReadOnlyRequest); + } + + public boolean canHealthStatusBeUpgraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { + + int successCountActual + = isReadOnlyRequest ? locationSpecificHealthContext.getSuccessCountForReadForRecovery() : locationSpecificHealthContext.getSuccessCountForWriteForRecovery(); + + LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); + + return successCountActual >= getMinimumSuccessCountForStatusUpgrade(locationHealthStatus, isReadOnlyRequest); + } + + public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, boolean isReadOnlyRequest) { + + if (isReadOnlyRequest) { + switch (status) { + case HealthyWithFailures: + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads(); + case HealthyTentative: + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads() / 2; + case Healthy: + case Unavailable: + return 0; + default: + throw new IllegalArgumentException("Unsupported health status: " + status); + } + } else { + switch (status) { + case HealthyWithFailures: + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites(); + case HealthyTentative: + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites() / 2; + case Healthy: + case Unavailable: + return 0; + default: + throw new IllegalArgumentException("Unsupported health status: " + status); + } + } + } + + public int getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus status, boolean isReadOnlyRequest) { + if (isReadOnlyRequest) { + switch (status) { + case HealthyTentative: + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads(); + case Unavailable: + case HealthyWithFailures: + case Healthy: + return 0; + default: + throw new IllegalArgumentException("Unsupported health status: " + status); + } + } else { + switch (status) { + case HealthyTentative: + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites(); + case Unavailable: + case HealthyWithFailures: + case Healthy: + return 0; + default: + throw new IllegalArgumentException("Unsupported health status: " + status); + } + } + } + + public boolean isPartitionLevelCircuitBreakerEnabled() { + return this.partitionLevelCircuitBreakerConfig.isPartitionLevelCircuitBreakerEnabled(); + } + + public PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreakerConfig() { + return partitionLevelCircuitBreakerConfig; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java new file mode 100644 index 000000000000..3b4ca3a429b6 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -0,0 +1,511 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.CosmosSchedulers; +import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.OperationType; +import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; +import com.azure.cosmos.implementation.ResourceType; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; +import com.azure.cosmos.implementation.directconnectivity.GatewayAddressCache; +import com.azure.cosmos.implementation.directconnectivity.GlobalAddressResolver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.net.URI; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +public class GlobalPartitionEndpointManagerForCircuitBreaker { + + private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); + + private static final ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.CosmosQueryRequestOptionsAccessor queryRequestOptionsAccessor + = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor(); + private final GlobalEndpointManager globalEndpointManager; + private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; + private final ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions; + private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; + private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; + private final AtomicReference globalAddressResolverSnapshot; + private final ConcurrentHashMap locationToRegion; + + public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); + this.partitionKeyRangesWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); + this.globalEndpointManager = globalEndpointManager; + + PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); + this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); + this.locationSpecificHealthContextTransitionHandler + = new LocationSpecificHealthContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); + this.globalAddressResolverSnapshot = new AtomicReference<>(); + this.locationToRegion = new ConcurrentHashMap<>(); + } + + public void init() { + if (this.consecutiveExceptionBasedCircuitBreaker.isPartitionLevelCircuitBreakerEnabled()) { + this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); + } + } + + public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { + + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + + checkNotNull(request.requestContext.resolvedPartitionKeyRange, "Argument 'request.requestContext.resolvedPartitionKeyRange' cannot be null!"); + + String collectionResourceId = request.getResourceId(); + checkNotNull(collectionResourceId, "Argument 'collectionResourceId' cannot be null!"); + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, collectionResourceId); + + AtomicBoolean isFailoverPossible = new AtomicBoolean(true); + AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); + + String collectionLink = getCollectionLink(request); + + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionLevelLocationUnavailabilityInfoAsVal) -> { + + if (partitionLevelLocationUnavailabilityInfoAsVal == null) { + partitionLevelLocationUnavailabilityInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); + } + + isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoAsVal.handleException( + partitionKeyRangeWrapperAsKey, + failedLocation, + request.isReadOnlyRequest())); + + if (isFailureThresholdBreached.get()) { + + UnmodifiableList applicableEndpoints = request.isReadOnlyRequest() ? + this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : + this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); + + isFailoverPossible.set( + partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); + } + + request.requestContext.setLocationToLocationSpecificHealthContext(partitionLevelLocationUnavailabilityInfoAsVal.regionToLocationSpecificHealthContext); + return partitionLevelLocationUnavailabilityInfoAsVal; + }); + + // set to true if and only if failure threshold exceeded for the region + // and if failover is possible + // a failover is only possible when there are available regions left to fail over to + if (isFailoverPossible.get()) { + return; + } + + if (logger.isWarnEnabled()) { + logger.warn("It is not possible to mark region {} as Unavailable for partition key range {}-{} and collection rid {} " + + "as all regions will be Unavailable in that case, will remove health status tracking for this partition!", + this.globalEndpointManager.getRegionName( + failedLocation, request.isReadOnlyRequest() ? OperationType.Read : OperationType.Create), + partitionKeyRange.getMinInclusive(), + partitionKeyRange.getMaxExclusive(), + collectionResourceId); + } + + // no regions to fail over to + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.remove(partitionKeyRangeWrapper); + } + + public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { + + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + + checkNotNull(request.requestContext.resolvedPartitionKeyRange, "Argument 'request.requestContext.resolvedPartitionKeyRange' cannot be null!"); + + String resourceId = request.getResourceId(); + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + URI succeededLocation = request.requestContext.locationEndpointToRoute; + + String collectionLink = getCollectionLink(request); + + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeToFailoverInfoAsVal) -> { + + if (partitionKeyRangeToFailoverInfoAsVal == null) { + partitionKeyRangeToFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); + } + + partitionKeyRangeToFailoverInfoAsVal.handleSuccess( + partitionKeyRangeWrapper, + collectionLink, + succeededLocation, + request.isReadOnlyRequest()); + + request.requestContext.setLocationToLocationSpecificHealthContext(partitionKeyRangeToFailoverInfoAsVal.regionToLocationSpecificHealthContext); + return partitionKeyRangeToFailoverInfoAsVal; + }); + } + + public List getUnavailableRegionsForPartitionKeyRange(String collectionResourceId, PartitionKeyRange partitionKeyRange, OperationType operationType) { + + checkNotNull(partitionKeyRange, "Argument 'partitionKeyRange' cannot be null!"); + checkNotNull(collectionResourceId, "Argument 'collectionResourceId' cannot be null!"); + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, collectionResourceId); + + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + List unavailableRegions = new ArrayList<>(); + + if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { + Map locationEndpointToFailureMetricsForPartition = + partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition; + + for (Map.Entry pair : locationEndpointToFailureMetricsForPartition.entrySet()) { + URI location = pair.getKey(); + LocationSpecificHealthContext locationSpecificHealthContext = pair.getValue(); + + if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { + unavailableRegions.add(this.globalEndpointManager.getRegionName(location, operationType)); + } + } + } + + return UnmodifiableList.unmodifiableList(unavailableRegions); + } + + private Flux updateStaleLocationInfo() { + return Mono.just(1) + .delayElement(Duration.ofSeconds(Configs.getStalePartitionUnavailabilityRefreshIntervalInSeconds())) + .repeat() + .flatMap(ignore -> Flux.fromIterable(this.partitionKeyRangesWithPossibleUnavailableRegions.entrySet())) + .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { + + logger.debug("Background updateStaleLocationInfo kicking in..."); + PartitionKeyRangeWrapper partitionKeyRangeWrapper = partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair.getKey(); + + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + if (partitionLevelLocationUnavailabilityInfo != null) { + + List>> locationToLocationSpecificHealthContextList = new ArrayList<>(); + + for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { + + URI locationWithStaleUnavailabilityInfo = locationToLocationLevelMetrics.getKey(); + LocationSpecificHealthContext locationSpecificHealthContext = locationToLocationLevelMetrics.getValue(); + + if (!locationSpecificHealthContext.isRegionAvailableToProcessRequests()) { + locationToLocationSpecificHealthContextList.add( + Pair.of( + partitionKeyRangeWrapper, + Pair.of( + locationWithStaleUnavailabilityInfo, + locationSpecificHealthContext))); + } + } + + if (locationToLocationSpecificHealthContextList.isEmpty()) { + this.partitionKeyRangesWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); + return Flux.empty(); + } else { + return Flux.fromIterable(locationToLocationSpecificHealthContextList); + } + } else { + this.partitionKeyRangesWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); + return Mono.empty(); + } + }) + .flatMap(locationToLocationSpecificHealthContextPair -> { + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = locationToLocationSpecificHealthContextPair.getLeft(); + URI locationWithStaleUnavailabilityInfo = locationToLocationSpecificHealthContextPair.getRight().getLeft(); + + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + if (partitionLevelLocationUnavailabilityInfo != null) { + + GlobalAddressResolver globalAddressResolver = this.globalAddressResolverSnapshot.get(); + + if (globalAddressResolver != null) { + + GatewayAddressCache gatewayAddressCache = globalAddressResolver.getGatewayAddressCache(locationWithStaleUnavailabilityInfo); + + if (gatewayAddressCache != null) { + + return gatewayAddressCache + .submitOpenConnectionTasks(partitionKeyRangeWrapper.getPartitionKeyRange(), partitionKeyRangeWrapper.getCollectionResourceId()) + .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .timeout(Duration.ofSeconds(Configs.getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds())) + .doOnComplete(() -> { + + if (logger.isDebugEnabled()) { + logger.debug("Partition health recovery query for partition key range : {}-{} and " + + "collection rid : {} has succeeded...", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId()); + } + + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + this.locationToRegion.getOrDefault(locationWithStaleUnavailabilityInfoAsKey, StringUtils.EMPTY), + false, + true); + } + return locationSpecificContextAsVal; + }); + }); + } + } else { + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.getOrDefault(locationWithStaleUnavailabilityInfoAsKey, StringUtils.EMPTY), + false, + true); + } + return locationSpecificContextAsVal; + }); + } + } + + return Flux.empty(); + }) + .onErrorResume(throwable -> { + if (logger.isDebugEnabled()) { + logger.debug("An exception was thrown trying to recover an Unavailable partition key range!"); + } + return Flux.empty(); + }); + } + + public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceRequest request) { + + if (!this.consecutiveExceptionBasedCircuitBreaker.isPartitionLevelCircuitBreakerEnabled()) { + return false; + } + + // could be a possible scenario when end-to-end timeout set on the operation is negative + // failing the operation with a NullPointerException would suppress the real issue in this case + // so when request is null - circuit breaking is effectively disabled + if (request == null) { + return false; + } + + if (request.getResourceType() != ResourceType.Document) { + return false; + } + + if (request.getOperationType() == OperationType.QueryPlan) { + return false; + } + + GlobalEndpointManager globalEndpointManager = this.globalEndpointManager; + + if (!globalEndpointManager.canUseMultipleWriteLocations(request)) { + return false; + } + + UnmodifiableList applicableWriteEndpoints = globalEndpointManager.getApplicableWriteEndpoints(Collections.emptyList()); + + return applicableWriteEndpoints != null && applicableWriteEndpoints.size() > 1; + } + + public void setGlobalAddressResolver(GlobalAddressResolver globalAddressResolver) { + this.globalAddressResolverSnapshot.set(globalAddressResolver); + } + + private class PartitionLevelLocationUnavailabilityInfo { + + private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; + private final ConcurrentHashMap regionToLocationSpecificHealthContext; + private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; + + private PartitionLevelLocationUnavailabilityInfo() { + this.locationEndpointToLocationSpecificContextForPartition = new ConcurrentHashMap<>(); + this.regionToLocationSpecificHealthContext = new ConcurrentHashMap<>(); + this.locationSpecificHealthContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationSpecificHealthContextTransitionHandler; + } + + private boolean handleException( + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + URI locationWithException, + boolean isReadOnlyRequest) { + + AtomicBoolean isExceptionThresholdBreached = new AtomicBoolean(false); + + this.locationEndpointToLocationSpecificContextForPartition.compute(locationWithException, (locationAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal == null) { + + locationSpecificContextAsVal = new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) + .withExceptionThresholdBreached(false) + .build(); + } + + LocationSpecificHealthContext locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleException( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionKeyRangesWithPossibleUnavailableRegions, + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.getOrDefault(locationWithException, StringUtils.EMPTY), + isReadOnlyRequest); + + + if (GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey) == null) { + + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.put( + locationAsKey, + GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + } + + String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); + this.regionToLocationSpecificHealthContext.put(region, locationSpecificHealthContextAfterTransition); + + isExceptionThresholdBreached.set(locationSpecificHealthContextAfterTransition.isExceptionThresholdBreached()); + return locationSpecificHealthContextAfterTransition; + }); + + return isExceptionThresholdBreached.get(); + } + + private void handleSuccess( + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String collectionLink, + URI succeededLocation, + boolean isReadOnlyRequest) { + + this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { + + LocationSpecificHealthContext locationSpecificHealthContextAfterTransition; + + if (locationSpecificContextAsVal == null) { + + locationSpecificContextAsVal = new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.Healthy) + .withExceptionThresholdBreached(false) + .build(); + } + + locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.getOrDefault(succeededLocation, StringUtils.EMPTY), + false, + isReadOnlyRequest); + + // used only for building diagnostics - so creating a lookup for URI and region name + + if (GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey) == null) { + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.put( + locationAsKey, + GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + } + + String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); + this.regionToLocationSpecificHealthContext.put(region, locationSpecificHealthContextAfterTransition); + + return locationSpecificHealthContextAfterTransition; + }); + } + + public boolean areLocationsAvailableForPartitionKeyRange(List availableLocationsAtAccountLevel) { + + for (URI availableLocation : availableLocationsAtAccountLevel) { + if (!this.locationEndpointToLocationSpecificContextForPartition.containsKey(availableLocation)) { + return true; + } else { + LocationSpecificHealthContext locationSpecificHealthContextSnapshot = this.locationEndpointToLocationSpecificContextForPartition.get(availableLocation); + + if (locationSpecificHealthContextSnapshot.isRegionAvailableToProcessRequests()) { + return true; + } + } + } + + return false; + } + } + + public ConsecutiveExceptionBasedCircuitBreaker getConsecutiveExceptionBasedCircuitBreaker() { + return this.consecutiveExceptionBasedCircuitBreaker; + } + + public PartitionLevelCircuitBreakerConfig getCircuitBreakerConfig() { + return this.consecutiveExceptionBasedCircuitBreaker.getPartitionLevelCircuitBreakerConfig(); + } + + private static String getCollectionLink(RxDocumentServiceRequest request) { + + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker + = request.requestContext.getPointOperationContextForCircuitBreaker(); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker + = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + if (pointOperationContextForCircuitBreaker != null) { + checkNotNull( + pointOperationContextForCircuitBreaker.getCollectionLink(), + "Argument 'pointOperationContextForCircuitBreaker.getCollectionLink()' cannot be null!"); + return pointOperationContextForCircuitBreaker.getCollectionLink(); + } + + if (feedOperationContextForCircuitBreaker != null) { + checkNotNull( + feedOperationContextForCircuitBreaker.getCollectionLink(), + "Argument 'feedOperationContextForCircuitBreaker.getCollectionLink()' cannot be null!"); + return feedOperationContextForCircuitBreaker.getCollectionLink(); + } + + throw new IllegalStateException("Both pointOperationContextForCircuitBreaker [or] feedOperationContextForCircuitBreaker cannot be null!"); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java new file mode 100644 index 000000000000..c0034708dd5a --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +public enum LocationHealthStatus { + Healthy("Healthy"), + HealthyWithFailures("HealthyWithFailures"), + Unavailable("Unavailable"), + HealthyTentative("HealthyTentative"); + + private final String stringifiedRepresentation; + + LocationHealthStatus(String stringifiedRepresentation) { + this.stringifiedRepresentation = stringifiedRepresentation; + } + + public String getStringifiedLocationHealthStatus() { + return stringifiedRepresentation; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java new file mode 100644 index 000000000000..472778e50fcb --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java @@ -0,0 +1,160 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.DiagnosticsInstantSerializer; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + +import java.io.IOException; +import java.io.Serializable; +import java.time.Instant; + +@JsonSerialize(using = LocationSpecificHealthContext.LocationSpecificHealthContextSerializer.class) +public class LocationSpecificHealthContext implements Serializable { + + private static final long serialVersionUID = 1L; + + private final int exceptionCountForWriteForCircuitBreaking; + private final int successCountForWriteForRecovery; + private final int exceptionCountForReadForCircuitBreaking; + private final int successCountForReadForRecovery; + private final Instant unavailableSince; + private final LocationHealthStatus locationHealthStatus; + private final boolean isExceptionThresholdBreached; + + LocationSpecificHealthContext( + int successCountForWriteForRecovery, + int exceptionCountForWriteForCircuitBreaking, + int successCountForReadForRecovery, + int exceptionCountForReadForCircuitBreaking, + Instant unavailableSince, + LocationHealthStatus locationHealthStatus, + boolean isExceptionThresholdBreached) { + + this.successCountForWriteForRecovery = successCountForWriteForRecovery; + this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; + this.successCountForReadForRecovery = successCountForReadForRecovery; + this.exceptionCountForReadForCircuitBreaking = exceptionCountForReadForCircuitBreaking; + this.unavailableSince = unavailableSince; + this.locationHealthStatus = locationHealthStatus; + this.isExceptionThresholdBreached = isExceptionThresholdBreached; + } + + public boolean isExceptionThresholdBreached() { + return this.isExceptionThresholdBreached; + } + + public boolean isRegionAvailableToProcessRequests() { + return this.locationHealthStatus == LocationHealthStatus.Healthy || + this.locationHealthStatus == LocationHealthStatus.HealthyWithFailures || + this.locationHealthStatus == LocationHealthStatus.HealthyTentative; + } + + public int getExceptionCountForWriteForCircuitBreaking() { + return this.exceptionCountForWriteForCircuitBreaking; + } + + public int getSuccessCountForWriteForRecovery() { + return this.successCountForWriteForRecovery; + } + + public int getExceptionCountForReadForCircuitBreaking() { + return this.exceptionCountForReadForCircuitBreaking; + } + + public int getSuccessCountForReadForRecovery() { + return this.successCountForReadForRecovery; + } + + public Instant getUnavailableSince() { + return this.unavailableSince; + } + + public LocationHealthStatus getLocationHealthStatus() { + return this.locationHealthStatus; + } + + static class Builder { + + private int exceptionCountForWriteForCircuitBreaking; + private int successCountForWriteForRecovery; + private int exceptionCountForReadForCircuitBreaking; + private int successCountForReadForRecovery; + private Instant unavailableSince; + private LocationHealthStatus locationHealthStatus; + private boolean isExceptionThresholdBreached; + + public Builder() {} + + public Builder withExceptionCountForWriteForCircuitBreaking(int exceptionCountForWriteForCircuitBreaking) { + this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; + return this; + } + + public Builder withSuccessCountForWriteForRecovery(int successCountForWriteForRecovery) { + this.successCountForWriteForRecovery = successCountForWriteForRecovery; + return this; + } + + public Builder withExceptionCountForReadForCircuitBreaking(int exceptionCountForReadForCircuitBreaking) { + this.exceptionCountForReadForCircuitBreaking = exceptionCountForReadForCircuitBreaking; + return this; + } + + public Builder withSuccessCountForReadForRecovery(int successCountForReadForRecovery) { + this.successCountForReadForRecovery = successCountForReadForRecovery; + return this; + } + + public Builder withUnavailableSince(Instant unavailableSince) { + this.unavailableSince = unavailableSince; + return this; + } + + public Builder withLocationHealthStatus(LocationHealthStatus locationHealthStatus) { + this.locationHealthStatus = locationHealthStatus; + return this; + } + + public Builder withExceptionThresholdBreached(boolean exceptionThresholdBreached) { + isExceptionThresholdBreached = exceptionThresholdBreached; + return this; + } + + public LocationSpecificHealthContext build() { + + return new LocationSpecificHealthContext( + this.successCountForWriteForRecovery, + this.exceptionCountForWriteForCircuitBreaking, + this.successCountForReadForRecovery, + this.exceptionCountForReadForCircuitBreaking, + this.unavailableSince, + this.locationHealthStatus, + this.isExceptionThresholdBreached); + } + } + + static class LocationSpecificHealthContextSerializer extends com.fasterxml.jackson.databind.JsonSerializer { + + @Override + public void serialize(LocationSpecificHealthContext value, JsonGenerator gen, SerializerProvider provider) throws IOException { + gen.writeStartObject(); + + gen.writeNumberField("exceptionCountForWriteForCircuitBreaking", value.exceptionCountForWriteForCircuitBreaking); + gen.writeNumberField("exceptionCountForReadForCircuitBreaking", value.exceptionCountForReadForCircuitBreaking); + gen.writeNumberField("successCountForWriteForRecovery", value.successCountForWriteForRecovery); + gen.writeNumberField("successCountForReadForRecovery", value.successCountForReadForRecovery); + gen.writePOJOField("locationHealthStatus", value.locationHealthStatus); + gen.writeStringField("unavailableSince", toInstantString(value.unavailableSince)); + + gen.writeEndObject(); + } + + private String toInstantString(Instant instant) { + return DiagnosticsInstantSerializer.fromInstant(instant); + } + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java new file mode 100644 index 000000000000..a3a34539df7f --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -0,0 +1,247 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.OperationType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.ConcurrentHashMap; + +public class LocationSpecificHealthContextTransitionHandler { + + private static final Logger logger = LoggerFactory.getLogger(LocationSpecificHealthContextTransitionHandler.class); + + private final GlobalEndpointManager globalEndpointManager; + private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; + + public LocationSpecificHealthContextTransitionHandler( + GlobalEndpointManager globalEndpointManager, + ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker) { + + this.globalEndpointManager = globalEndpointManager; + this.consecutiveExceptionBasedCircuitBreaker = consecutiveExceptionBasedCircuitBreaker; + } + + public LocationSpecificHealthContext handleSuccess( + LocationSpecificHealthContext locationSpecificHealthContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String regionWithSuccess, + boolean forceStatusChange, + boolean isReadOnlyRequest) { + + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificHealthContext.getLocationHealthStatus(); + + int exceptionCountActual + = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + + switch (currentLocationHealthStatusSnapshot) { + case Healthy: + break; + case HealthyWithFailures: + if (!forceStatusChange) { + if (exceptionCountActual > 0) { + return this.consecutiveExceptionBasedCircuitBreaker + .handleSuccess( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithSuccess, + isReadOnlyRequest); + } + } + break; + + case HealthyTentative: + if (!forceStatusChange) { + + LocationSpecificHealthContext locationSpecificHealthContextInner + = this.consecutiveExceptionBasedCircuitBreaker + .handleSuccess( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithSuccess, + isReadOnlyRequest); + + if (this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificHealthContextInner, isReadOnlyRequest)) { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId(), + regionWithSuccess); + } + + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Healthy); + } else { + return locationSpecificHealthContextInner; + } + } + break; + case Unavailable: + Instant unavailableSinceActual = locationSpecificHealthContext.getUnavailableSince(); + if (!forceStatusChange) { + if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(Configs.getAllowedPartitionUnavailabilityDurationInSeconds())) > 0) { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId(), + regionWithSuccess); + } + + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); + } + } else { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId(), + regionWithSuccess); + } + + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); + } + break; + default: + throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); + } + + return locationSpecificHealthContext; + } + + public LocationSpecificHealthContext handleException( + LocationSpecificHealthContext locationSpecificHealthContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions, + String regionWithException, + boolean isReadOnlyRequest) { + + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificHealthContext.getLocationHealthStatus(); + + switch (currentLocationHealthStatusSnapshot) { + case Healthy: + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId(), + regionWithException); + } + + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyWithFailures); + case HealthyWithFailures: + if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { + + LocationSpecificHealthContext locationSpecificHealthContextInner + = this.consecutiveExceptionBasedCircuitBreaker + .handleException( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithException, + isReadOnlyRequest); + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId(), + isReadOnlyRequest ? locationSpecificHealthContextInner.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContextInner.getExceptionCountForWriteForCircuitBreaking(), + regionWithException); + } + + return locationSpecificHealthContextInner; + } else { + partitionKeyRangesWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); + + if (logger.isDebugEnabled()) { + logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyWithFailures for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange(), + regionWithException); + } + + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); + } + case HealthyTentative: + if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { + return this.consecutiveExceptionBasedCircuitBreaker + .handleException( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithException, + isReadOnlyRequest); + } else { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId(), + regionWithException); + } + + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); + } + default: + throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); + } + } + + public LocationSpecificHealthContext transitionHealthStatus(LocationSpecificHealthContext locationSpecificHealthContext, LocationHealthStatus newStatus) { + + LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0); + + switch (newStatus) { + case Healthy: + + return builder + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.Healthy) + .withExceptionThresholdBreached(false) + .build(); + + case HealthyWithFailures: + + return builder + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) + .withExceptionThresholdBreached(false) + .build(); + + case Unavailable: + + return builder + .withUnavailableSince(Instant.now()) + .withLocationHealthStatus(LocationHealthStatus.Unavailable) + .withExceptionThresholdBreached(true) + .build(); + + case HealthyTentative: + + return builder + .withUnavailableSince(Instant.now()) + .withLocationHealthStatus(LocationHealthStatus.HealthyTentative) + .withExceptionThresholdBreached(false) + .build(); + + default: + throw new IllegalStateException("Unsupported health status: " + newStatus); + } + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java new file mode 100644 index 000000000000..92218a2e2736 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.PartitionKeyRange; + +import java.util.Objects; + +public class PartitionKeyRangeWrapper { + private final PartitionKeyRange partitionKeyRange; + private final String resourceId; + + public PartitionKeyRangeWrapper(PartitionKeyRange partitionKeyRange, String resourceId) { + this.partitionKeyRange = partitionKeyRange; + this.resourceId = resourceId; + } + + public PartitionKeyRange getPartitionKeyRange() { + return partitionKeyRange; + } + + public String getCollectionResourceId() { + return resourceId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PartitionKeyRangeWrapper that = (PartitionKeyRangeWrapper) o; + return Objects.equals(partitionKeyRange, that.partitionKeyRange) && Objects.equals(resourceId, that.resourceId); + } + + @Override + public int hashCode() { + return Objects.hash(partitionKeyRange, resourceId); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java new file mode 100644 index 000000000000..a3c70f2a1b13 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.annotation.Nulls; +import com.fasterxml.jackson.core.JsonProcessingException; + +public class PartitionLevelCircuitBreakerConfig { + + public static final PartitionLevelCircuitBreakerConfig DEFAULT = new PartitionLevelCircuitBreakerConfig(); + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private Boolean isPartitionLevelCircuitBreakerEnabled = false; + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private String circuitBreakerType = "CONSECUTIVE_EXCEPTION_COUNT_BASED"; + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private int consecutiveExceptionCountToleratedForReads = 10; + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private int consecutiveExceptionCountToleratedForWrites = 5; + + private String cachedConfigAsString = ""; + + public Boolean isPartitionLevelCircuitBreakerEnabled() { + return isPartitionLevelCircuitBreakerEnabled; + } + + // todo (abhmohanty): keep this method around for future-proofing (adding more circuit breaker types) + public String getCircuitBreakerType() { + return circuitBreakerType; + } + + public int getConsecutiveExceptionCountToleratedForReads() { + return consecutiveExceptionCountToleratedForReads; + } + + public int getConsecutiveExceptionCountToleratedForWrites() { + return consecutiveExceptionCountToleratedForWrites; + } + + public String toJson() { + try { + return Utils.getSimpleObjectMapper().writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new RuntimeException("Unable to convert to Json String", e); + } + } + + public String getConfigAsString() { + + if (StringUtils.isEmpty(this.cachedConfigAsString)) { + this.cachedConfigAsString = "(" + "cb: " + this.isPartitionLevelCircuitBreakerEnabled + ", " + + "type: " + this.circuitBreakerType + ", " + + "rexcntt: " + this.consecutiveExceptionCountToleratedForReads + ", " + + "wexcntt: " + this.consecutiveExceptionCountToleratedForWrites + ")"; + } + + return this.cachedConfigAsString; + } + + public static PartitionLevelCircuitBreakerConfig fromJsonString(String jsonString) { + try { + return Utils.getSimpleObjectMapper().readValue(jsonString, PartitionLevelCircuitBreakerConfig.class); + } catch (JsonProcessingException e) { + throw new RuntimeException("Unable to convert from Json String", e); + } + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index 076f07f793da..28d83c7018b8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -82,7 +82,6 @@ public Mono resolveAsync( } request.requestContext.resolvedPartitionKeyRange = result.TargetPartitionKeyRange; - return Mono.just(result.Addresses); }); } @@ -681,7 +680,8 @@ private PartitionKeyRange tryResolveServerPartitionByPartitionKey( // partition getKey definition cached - like if collection with same getName but with RANGE partitioning is created. // In this case server will not pass x-ms-documentdb-collection-rid check and will return back InvalidPartitionException. // GATEWAY will refresh its cache and retry. - String effectivePartitionKey = PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKey, collection.getPartitionKey()); + String effectivePartitionKey = StringUtils.isNotEmpty(request.getEffectivePartitionKey()) + ? request.getEffectivePartitionKey() : PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKey, collection.getPartitionKey()); request.setEffectivePartitionKey(effectivePartitionKey); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java index 47889bea95a8..c6b84d130efa 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java @@ -1141,6 +1141,29 @@ public Mono submitOpenConnectionTask( return Mono.fromFuture(openConnectionTask); } + public Flux submitOpenConnectionTasks( + PartitionKeyRange partitionKeyRange, + String collectionRid) { + + if (this.proactiveOpenConnectionsProcessor == null) { + return Flux.empty(); + } + + checkNotNull(partitionKeyRange, "Argument 'partitionKeyRange' cannot be null!"); + checkNotNull(collectionRid, "Argument 'collectionRid' cannot be null!"); + + PartitionKeyRangeIdentity partitionKeyRangeIdentity = new PartitionKeyRangeIdentity(collectionRid, partitionKeyRange.getId()); + + return this.serverPartitionAddressCache.getAsync(partitionKeyRangeIdentity, cachedAddresses -> Mono.just(cachedAddresses), cachedAddresses -> true) + .flatMapMany(cachedAddresses -> Flux.fromArray(cachedAddresses)) + .flatMap(addressInformation -> Mono.fromFuture( + this.proactiveOpenConnectionsProcessor.submitOpenConnectionTaskOutsideLoop( + collectionRid, + this.addressEndpoint, + addressInformation.getPhysicalUri(), + 1))); + } + private Mono> getServerAddressesViaGatewayWithRetry( RxDocumentServiceRequest request, String collectionRid, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index fed1cbde61a8..ecd0fa583d35 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -317,6 +317,16 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { return endpointCache; } + public GatewayAddressCache getGatewayAddressCache(URI endpoint) { + EndpointCache endpointCache = this.addressCacheByEndpoint.get(endpoint); + + if (endpointCache != null) { + return endpointCache.addressCache; + } + + return null; + } + static class EndpointCache { GatewayAddressCache addressCache; AddressResolver addressResolver; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java index b6ba56956913..8248ef1664c5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java @@ -26,7 +26,6 @@ import java.time.Instant; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.IntBinaryOperator; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index c72cbb2fd5e7..1813a2f51a52 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -51,7 +51,6 @@ public class StoreClient implements IStoreClient { private final DiagnosticsClientContext diagnosticsClientContext; private final Logger logger = LoggerFactory.getLogger(StoreClient.class); private final GatewayServiceConfigurationReader serviceConfigurationReader; - private final ISessionContainer sessionContainer; private final ReplicatedResourceClient replicatedResourceClient; private final TransportClient transportClient; @@ -189,6 +188,7 @@ private RxDocumentServiceResponse completeResponse( RxDocumentServiceResponse rxDocumentServiceResponse = new RxDocumentServiceResponse(this.diagnosticsClientContext, storeResponse); rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); + return rxDocumentServiceResponse; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java index e6c6c17e28b7..f3113c5e2d6c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java @@ -264,6 +264,7 @@ public long getRequestId() { public boolean expire() { final CosmosException error; + if ((this.args.serviceRequest().isReadOnly() || !this.hasSendingRequestStarted()) || this.args.serviceRequest().getNonIdempotentWriteRetriesEnabled()){ // Convert from requestTimeoutException to GoneException for the following two scenarios so they can be safely retried: @@ -272,7 +273,7 @@ public boolean expire() { error = new GoneException(this.toString(), null, this.args.physicalAddressUri().getURI(), HttpConstants.SubStatusCodes.TRANSPORT_GENERATED_410); } else { // For sent write request, converting to requestTimeout, will not be retried. - error = new RequestTimeoutException(this.toString(), this.args.physicalAddressUri().getURI()); + error = new RequestTimeoutException(this.toString(), this.args.physicalAddressUri().getURI(), HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT); } BridgeInternal.setRequestHeaders(error, this.args.serviceRequest().getHeaders()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java index 05d0611e1ac6..57e62d3b9abd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java @@ -219,6 +219,7 @@ public Mono populateFeedRangeFilteringHeaders( // 2) The EpkRange spans exactly one physical partition // In this case we can route to the physical pkrange id request.routeTo(new PartitionKeyRangeIdentity(pkRanges.get(0).getId())); + request.setHasFeedRangeFilteringBeenApplied(true); } else { // 3) The EpkRange spans less than single physical partition // In this case we route to the physical partition and @@ -236,6 +237,7 @@ public Mono populateFeedRangeFilteringHeaders( HttpConstants.HttpHeaders.END_EPK, this.range.getMax()); + request.setHasFeedRangeFilteringBeenApplied(true); } return Mono.just(request); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java index e2eba8fd77fa..38453bdca944 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java @@ -167,7 +167,7 @@ public Mono populateFeedRangeFilteringHeaders( .getNormalizedEffectiveRange(routingMapProvider, metadataDiagnosticsCtx, collectionResolutionMono) .map(effectiveRange -> { request.setEffectiveRange(effectiveRange); - + request.setHasFeedRangeFilteringBeenApplied(true); return request; }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java index 281adbb9e6d4..df92f69d23bb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java @@ -149,7 +149,7 @@ public Mono populateFeedRangeFilteringHeaders( .getNormalizedEffectiveRange(routingMapProvider, metadataDiagnosticsCtx, collectionResolutionMono) .map(effectiveRange -> { request.setEffectiveRange(effectiveRange); - + request.setHasFeedRangeFilteringBeenApplied(true); return request; }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index 63ca4bb8666a..6ca5868f772e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -5,6 +5,8 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.GoneException; import com.azure.cosmos.implementation.InvalidPartitionExceptionRetryPolicy; import com.azure.cosmos.implementation.MetadataDiagnosticsContext; @@ -47,8 +49,10 @@ public ChangeFeedFetcher( int top, int maxItemCount, boolean isSplitHandlingDisabled, - OperationContextAndListenerTuple operationContext) { - super(executeFunc, true, top, maxItemCount, operationContext, null); + OperationContextAndListenerTuple operationContext, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { + super(executeFunc, true, top, maxItemCount, operationContext, null, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); checkNotNull(client, "Argument 'client' must not be null."); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); @@ -99,6 +103,7 @@ public ChangeFeedFetcher( this.createRequestFunc = () -> { RxDocumentServiceRequest request = createRequestFunc.get(); + request.requestContext.setClientRetryPolicySupplier(() -> this.feedRangeContinuationRetryPolicy); this.feedRangeContinuationRetryPolicy.onBeforeSendRequest(request); return request; }; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index 8db74b48bf89..865203676512 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -132,7 +132,7 @@ public Flux> executeAsync() { return Paginator .getPaginatedQueryResultAsObservable( - newCosmosQueryRequestOptions, createRequestFunc, executeFunc, maxPageSize); + newCosmosQueryRequestOptions, createRequestFunc, executeFunc, maxPageSize, this.client.getGlobalEndpointManager(), this.client.getGlobalPartitionEndpointManagerForCircuitBreaker()); } public Mono> getTargetPartitionKeyRanges(String resourceId, List> queryRanges) { @@ -187,7 +187,8 @@ protected Function>> executeInter OperationType.Query, this::createClientRetryPolicyInstance, req, - this::executeInternalFuncCore); + this::executeInternalFuncCore, + PathsHelper.getCollectionPath(super.resourceLink)); } private Mono> executeInternalFuncCore( @@ -202,33 +203,40 @@ private Mono> executeInternalFuncCore( return BackoffRetryUtility.executeRetry(() -> { this.retries.incrementAndGet(); - return executeRequestAsync( - this.itemSerializer, - req); - }, finalRetryPolicyInstance) - .map(tFeedResponse -> { - this.fetchSchedulingMetrics.stop(); - this.fetchExecutionRangeAccumulator.endFetchRange(tFeedResponse.getActivityId(), - tFeedResponse.getResults().size(), - this.retries.get()); - ImmutablePair schedulingTimeSpanMap = - new ImmutablePair<>(DEFAULT_PARTITION_RANGE, this.fetchSchedulingMetrics.getElapsedTime()); - if (!StringUtils.isEmpty(tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.QUERY_METRICS))) { - QueryMetrics qm = - BridgeInternal.createQueryMetricsFromDelimitedStringAndClientSideMetrics(tFeedResponse.getResponseHeaders() - .get(HttpConstants.HttpHeaders.QUERY_METRICS), - new ClientSideMetrics(this.retries.get(), - tFeedResponse.getRequestCharge(), - this.fetchExecutionRangeAccumulator.getExecutionRanges(), - Collections.singletonList(schedulingTimeSpanMap)), - tFeedResponse.getActivityId(), - tFeedResponse.getResponseHeaders().getOrDefault(HttpConstants.HttpHeaders.INDEX_UTILIZATION, null)); - String pkrId = tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID); - String queryMetricKey = DEFAULT_PARTITION_RANGE + ",pkrId:" + pkrId; - BridgeInternal.putQueryMetricsIntoMap(tFeedResponse, queryMetricKey, qm); - } - return tFeedResponse; - }); + + return Mono.just(req) + .flatMap(request -> client.populateFeedRangeHeader(request)) + .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions)) + .flatMap(request -> { + finalRetryPolicyInstance.onBeforeSendRequest(request); + return executeRequestAsync( + this.itemSerializer, + req); + }); + }, finalRetryPolicyInstance) + .map(tFeedResponse -> { + this.fetchSchedulingMetrics.stop(); + this.fetchExecutionRangeAccumulator.endFetchRange(tFeedResponse.getActivityId(), + tFeedResponse.getResults().size(), + this.retries.get()); + ImmutablePair schedulingTimeSpanMap = + new ImmutablePair<>(DEFAULT_PARTITION_RANGE, this.fetchSchedulingMetrics.getElapsedTime()); + if (!StringUtils.isEmpty(tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.QUERY_METRICS))) { + QueryMetrics qm = + BridgeInternal.createQueryMetricsFromDelimitedStringAndClientSideMetrics(tFeedResponse.getResponseHeaders() + .get(HttpConstants.HttpHeaders.QUERY_METRICS), + new ClientSideMetrics(this.retries.get(), + tFeedResponse.getRequestCharge(), + this.fetchExecutionRangeAccumulator.getExecutionRanges(), + Collections.singletonList(schedulingTimeSpanMap)), + tFeedResponse.getActivityId(), + tFeedResponse.getResponseHeaders().getOrDefault(HttpConstants.HttpHeaders.INDEX_UTILIZATION, null)); + String pkrId = tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID); + String queryMetricKey = DEFAULT_PARTITION_RANGE + ",pkrId:" + pkrId; + BridgeInternal.putQueryMetricsIntoMap(tFeedResponse, queryMetricKey, qm); + } + return tFeedResponse; + }); } public RxDocumentServiceRequest createRequestAsync(String continuationToken, Integer maxPageSize) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index 19524b8ac1af..65c1cd18176e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -19,6 +19,7 @@ import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.ImmutablePair; +import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.query.metrics.ClientSideMetrics; import com.azure.cosmos.implementation.query.metrics.FetchExecutionRangeAccumulator; @@ -49,6 +50,7 @@ * This is meant to be internally used only by our sdk. */ class DocumentProducer { + private static final ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.CosmosQueryRequestOptionsAccessor qryOptionsAccessor = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor(); @@ -142,14 +144,18 @@ public DocumentProducer( executeFeedOperationCore = (clientRetryPolicyFactory, request) -> { DocumentClientRetryPolicy finalRetryPolicy = clientRetryPolicyFactory.get(); return ObservableHelper.inlineIfPossibleAsObs( - () -> { - if(finalRetryPolicy != null) { - finalRetryPolicy.onBeforeSendRequest(request); - } - - ++retries; - return executeRequestFunc.apply(request); - }, finalRetryPolicy); + () -> Mono + .just(request) + .flatMap(req -> client.populateFeedRangeHeader(req)) + .flatMap(req -> client.addPartitionLevelUnavailableRegionsOnRequest(req, cosmosQueryRequestOptions)) + .flatMap(req -> { + + if(finalRetryPolicy != null) { + finalRetryPolicy.onBeforeSendRequest(req); + } + ++retries; + return executeRequestFunc.apply(req); + }), finalRetryPolicy); }; this.correlatedActivityId = correlatedActivityId; @@ -175,7 +181,8 @@ public DocumentProducer( return null; }, request, - executeFeedOperationCore); + executeFeedOperationCore, + collectionLink); }; this.lastResponseContinuationToken = initialContinuationToken; @@ -199,7 +206,9 @@ public Flux produceAsync() { pageSize, Paginator.getPreFetchCount(cosmosQueryRequestOptions, top, pageSize), qryOptionsAccessor.getImpl(cosmosQueryRequestOptions).getOperationContextAndListenerTuple(), - qryOptionsAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions) + qryOptionsAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), + client.getGlobalEndpointManager(), + client.getGlobalPartitionEndpointManagerForCircuitBreaker() ) .map(rsp -> { this.lastResponseContinuationToken = rsp.getContinuationToken(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java index f821f7aa1a9d..220f5cb0e394 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java @@ -121,6 +121,10 @@ protected RxDocumentServiceRequest createDocumentServiceRequestWithFeedRange(Map ? this.createQueryDocumentServiceRequest(requestHeaders, querySpec) : this.createReadFeedDocumentServiceRequest(requestHeaders); request.requestContext.resolvedCollectionRid = collectionRid; + + qryOptAccessor.setCollectionRid(cosmosQueryRequestOptions, collectionRid); + + request.setResourceId(collectionRid); request.throughputControlGroupName = throughputControlGroupName; if (partitionKeyInternal != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java index 4e9d077ba473..a1b506391718 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java @@ -313,6 +313,7 @@ public static Flux> createDocume return collectionObs.single().flatMap(collectionValueHolder -> { queryRequestOptionsAccessor.setPartitionKeyDefinition(cosmosQueryRequestOptions, collectionValueHolder.v.getPartitionKey()); + queryRequestOptionsAccessor.setCollectionRid(cosmosQueryRequestOptions, collectionValueHolder.v.getResourceId()); Mono>, QueryInfo>> queryPlanTask = getPartitionKeyRangesAndQueryInfo(diagnosticsClientContext, @@ -460,7 +461,7 @@ public static Flux> createSpecia public static Flux> createReadManyQueryAsync( DiagnosticsClientContext diagnosticsClientContext, IDocumentQueryClient queryClient, String collectionResourceId, SqlQuerySpec sqlQuery, Map rangeQueryMap, CosmosQueryRequestOptions cosmosQueryRequestOptions, - String resourceId, String collectionLink, UUID activityId, Class klass, + DocumentCollection collection, String collectionLink, UUID activityId, Class klass, ResourceType resourceTypeEnum, final AtomicBoolean isQueryCancelledOnTimeout) { @@ -470,7 +471,7 @@ public static Flux> createReadMa sqlQuery, rangeQueryMap, cosmosQueryRequestOptions, - resourceId, + collection, collectionLink, activityId, klass, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 6a113b8209c1..3ede5f8b1736 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -4,6 +4,10 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.FeedResponse; @@ -13,6 +17,7 @@ import reactor.core.publisher.Mono; import reactor.core.publisher.SignalType; +import java.net.URI; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -24,6 +29,10 @@ abstract class Fetcher { private final static Logger logger = LoggerFactory.getLogger(Fetcher.class); + private final static + ImplementationBridgeHelpers.CosmosDiagnosticsHelper.CosmosDiagnosticsAccessor diagnosticsAccessor = + ImplementationBridgeHelpers.CosmosDiagnosticsHelper.getCosmosDiagnosticsAccessor(); + private final Function>> executeFunc; private final boolean isChangeFeed; private final OperationContextAndListenerTuple operationContext; @@ -33,6 +42,8 @@ abstract class Fetcher { private final AtomicInteger maxItemCount; private final AtomicInteger top; private final List cancelledRequestDiagnosticsTracker; + private final GlobalEndpointManager globalEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; public Fetcher( Function>> executeFunc, @@ -40,7 +51,9 @@ public Fetcher( int top, int maxItemCount, OperationContextAndListenerTuple operationContext, - List cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { checkNotNull(executeFunc, "Argument 'executeFunc' must not be null."); @@ -64,6 +77,8 @@ public Fetcher( } this.shouldFetchMore = new AtomicBoolean(true); this.cancelledRequestDiagnosticsTracker = cancelledRequestDiagnosticsTracker; + this.globalEndpointManager = globalEndpointManager; + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; } public final boolean shouldFetchMore() { @@ -154,7 +169,22 @@ private Mono> nextPage(RxDocumentServiceRequest request) { updateState(rsp, request); return rsp; }) - .doOnNext(response -> completed.set(true)) + .doOnNext(response -> { + completed.set(true); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' must not be null!"); + + if (!feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + feedOperationContextForCircuitBreaker.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); + } + } + }) .doOnError(throwable -> completed.set(true)) .doFinally(signalType -> { // If the signal type is not cancel(which means success or error), we do not need to tracking the diagnostics here @@ -169,9 +199,31 @@ private Mono> nextPage(RxDocumentServiceRequest request) { return; } + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); + + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContextForCircuitBreaker(); + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' must not be null!"); + + if (!feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + if (this.globalEndpointManager != null) { + this.handleCancellationExceptionForPartitionKeyRange(request); + } + } + } + if (request.requestContext != null && request.requestContext.cosmosDiagnostics != null) { this.cancelledRequestDiagnosticsTracker.add(request.requestContext.cosmosDiagnostics); } }); } + + private void handleCancellationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { + URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); + + if (firstContactedLocationEndpoint != null) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index 77f5d7b3c09a..75868822962f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -4,6 +4,9 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.DocumentCollection; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.caches.IPartitionKeyRangeCache; @@ -43,13 +46,13 @@ public interface IDocumentQueryClient { /** * TODO: this should be async returning observable - * @return + * @return */ ConsistencyLevel getDefaultConsistencyLevelAsync(); /** * TODO: this should be async returning observable - * @return + * @return */ ConsistencyLevel getDesiredConsistencyLevelAsync(); @@ -62,7 +65,8 @@ Mono executeFeedOperationWithAvailabilityStrategy( final OperationType operationType, final Supplier retryPolicyFactory, final RxDocumentServiceRequest req, - final BiFunction, RxDocumentServiceRequest, Mono> feedOperation); + final BiFunction, RxDocumentServiceRequest, Mono> feedOperation, + final String collectionLink); CosmosItemSerializer getEffectiveItemSerializer(CosmosQueryRequestOptions queryRequestOptions); @@ -89,4 +93,12 @@ enum QueryCompatibilityMode { } Mono readFeedAsync(RxDocumentServiceRequest request); + + Mono populateFeedRangeHeader(RxDocumentServiceRequest request); + + Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions); + + GlobalEndpointManager getGlobalEndpointManager(); + + GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java index 0596e77dd2cd..568ed0e85f36 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java @@ -155,7 +155,9 @@ protected NonStreamingOrderByDocumentProducer createDocumentProducer( TriFunction createRequestFunc, Function>> executeFunc, Supplier createRetryPolicyFunc, - FeedRangeEpkImpl feedRange) { + FeedRangeEpkImpl feedRange, + String collectionLink) { + return new NonStreamingOrderByDocumentProducer( consumeComparer, client, @@ -164,7 +166,7 @@ protected NonStreamingOrderByDocumentProducer createDocumentProducer( createRequestFunc, executeFunc, feedRange, - collectionRid, + collectionLink, createRetryPolicyFunc, Document.class, correlatedActivityId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java index c8a110853a7d..f94731115d4d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java @@ -553,7 +553,9 @@ protected OrderByDocumentProducer createDocumentProducer( Map commonRequestHeaders, TriFunction createRequestFunc, Function>> executeFunc, - Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange) { + Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange, + String collectionLink) { + return new OrderByDocumentProducer(consumeComparer, client, collectionRid, @@ -561,7 +563,7 @@ protected OrderByDocumentProducer createDocumentProducer( createRequestFunc, executeFunc, feedRange, - collectionRid, + collectionLink, createRetryPolicyFunc, resourceType, correlatedActivityId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java index 8df2e3d9c7c0..d8e220af4131 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java @@ -3,6 +3,8 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; @@ -41,7 +43,9 @@ public static Flux> getPaginatedQueryResultAsObservable( CosmosQueryRequestOptions cosmosQueryRequestOptions, BiFunction createRequestFunc, Function>> executeFunc, - int maxPageSize) { + int maxPageSize, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { int top = -1; return getPaginatedQueryResultAsObservable( @@ -52,7 +56,9 @@ public static Flux> getPaginatedQueryResultAsObservable( maxPageSize, getPreFetchCount(cosmosQueryRequestOptions, top, maxPageSize), qryOptAccessor.getImpl(cosmosQueryRequestOptions).getOperationContextAndListenerTuple(), - qryOptAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions)); + qryOptAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker); } public static Flux> getPaginatedQueryResultAsObservable( @@ -63,7 +69,9 @@ public static Flux> getPaginatedQueryResultAsObservable( int maxPageSize, int maxPreFetchCount, OperationContextAndListenerTuple operationContext, - List cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { return getPaginatedQueryResultAsObservable( continuationToken, @@ -74,7 +82,9 @@ public static Flux> getPaginatedQueryResultAsObservable( maxPreFetchCount, false, operationContext, - cancelledRequestDiagnosticsTracker); + cancelledRequestDiagnosticsTracker, + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker); } public static Flux> getChangeFeedQueryResultAsObservable( @@ -99,7 +109,10 @@ public static Flux> getChangeFeedQueryResultAsObservable( top, maxPageSize, isSplitHandlingDisabled, - operationContext), + operationContext, + client.getGlobalEndpointManager(), + client.getGlobalPartitionEndpointManagerForCircuitBreaker() + ), preFetchCount); } @@ -137,7 +150,9 @@ private static Flux> getPaginatedQueryResultAsObservable( int preFetchCount, boolean isChangeFeed, OperationContextAndListenerTuple operationContext, - List cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { return getPaginatedQueryResultAsObservable( () -> new ServerSideOnlyContinuationFetcherImpl<>( @@ -148,7 +163,9 @@ private static Flux> getPaginatedQueryResultAsObservable( top, maxPageSize, operationContext, - cancelledRequestDiagnosticsTracker), + cancelledRequestDiagnosticsTracker, + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker), preFetchCount); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java index 6ed30d3a7fbe..d51f50c39b56 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java @@ -128,7 +128,7 @@ public static Flux> createReadManyQueryA IDocumentQueryClient queryClient, SqlQuerySpec sqlQuery, Map rangeQueryMap, - CosmosQueryRequestOptions cosmosQueryRequestOptions, String collectionRid, String collectionLink, UUID activityId, Class klass, + CosmosQueryRequestOptions cosmosQueryRequestOptions, DocumentCollection collection, String collectionLink, UUID activityId, Class klass, ResourceType resourceTypeEnum, final AtomicBoolean isQueryCancelledOnTimeout) { @@ -145,7 +145,7 @@ public static Flux> createReadManyQueryA isQueryCancelledOnTimeout); context - .initializeReadMany(rangeQueryMap, cosmosQueryRequestOptions, collectionRid); + .initializeReadMany(rangeQueryMap, cosmosQueryRequestOptions, collection); return Flux.just(context); } @@ -495,13 +495,14 @@ protected DocumentProducer createDocumentProducer( Map commonRequestHeaders, TriFunction createRequestFunc, Function>> executeFunc, - Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange) { + Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange, + String collectionLink) { return new DocumentProducer<>(client, collectionRid, cosmosQueryRequestOptions, createRequestFunc, executeFunc, - collectionRid, + collectionLink, createRetryPolicyFunc, resourceType, correlatedActivityId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java index 8bcacdf45078..629e24c5ef45 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java @@ -8,6 +8,7 @@ import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; @@ -86,6 +87,8 @@ protected void initialize( } } + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setPartitionKeyDefinition(cosmosQueryRequestOptions, collection.getPartitionKey()); + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setCollectionRid(cosmosQueryRequestOptions, collection.getResourceId()); return this.createDocumentServiceRequestWithFeedRange(headers, querySpecForInit, partitionKeyInternal, feedRange, collection.getResourceId(), cosmosQueryRequestOptions.getThroughputControlGroupName()); }; @@ -106,7 +109,8 @@ protected void initialize( createRequestFunc, executeFunc, () -> client.getResetSessionTokenRetryPolicy().getRequestPolicy(this.diagnosticsClientContext), - targetRange); + targetRange, + collection.getSelfLink()); documentProducers.add(dp); } @@ -122,7 +126,8 @@ abstract protected DocumentProducer createDocumentProducer(String collectionR Function>> executeFunc, Supplier createRetryPolicyFunc, - FeedRangeEpkImpl feedRange); + FeedRangeEpkImpl feedRange, + String collectionLink); @Override abstract public Flux> drainAsync(int maxPageSize); @@ -138,7 +143,7 @@ public void setTop(int newTop) { protected void initializeReadMany( Map rangeQueryMap, CosmosQueryRequestOptions cosmosQueryRequestOptions, - String collectionRid) { + DocumentCollection collection) { Map commonRequestHeaders = createCommonHeadersAsync(this.getFeedOptions(null, null)); for (Map.Entry entry : rangeQueryMap.entrySet()) { @@ -152,11 +157,13 @@ protected void initializeReadMany( headers.put(HttpConstants.HttpHeaders.CONTINUATION, continuationToken); headers.put(HttpConstants.HttpHeaders.PAGE_SIZE, Strings.toString(pageSize)); + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setPartitionKeyDefinition(cosmosQueryRequestOptions, collection.getPartitionKey()); + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setCollectionRid(cosmosQueryRequestOptions, collection.getResourceId()); return this.createDocumentServiceRequestWithFeedRange(headers, querySpec, null, partitionKeyRange, - collectionRid, + collection.getResourceId(), cosmosQueryRequestOptions.getThroughputControlGroupName()); }; @@ -166,7 +173,7 @@ protected void initializeReadMany( DocumentProducer dp = createDocumentProducer( - collectionRid, + collection.getResourceId(), null, -1, cosmosQueryRequestOptions, @@ -175,7 +182,8 @@ protected void initializeReadMany( createRequestFunc, executeFunc, () -> client.getResetSessionTokenRetryPolicy().getRequestPolicy(this.diagnosticsClientContext), - feedRangeEpk); + feedRangeEpk, + collection.getSelfLink()); documentProducers.add(dp); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java index 6c3fa2827216..9331409aba76 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java @@ -102,7 +102,7 @@ static Flux> createAsyncCore( public static Flux> createReadManyAsync( DiagnosticsClientContext diagnosticsClientContext, IDocumentQueryClient queryClient, SqlQuerySpec sqlQuery, Map rangeQueryMap, CosmosQueryRequestOptions cosmosQueryRequestOptions, - String resourceId, String collectionLink, UUID activityId, Class klass, + DocumentCollection collection, String collectionLink, UUID activityId, Class klass, ResourceType resourceTypeEnum, final AtomicBoolean isQueryCancelledOnTimeout) { @@ -110,7 +110,7 @@ public static Flux> createReadManyAsyn ParallelDocumentQueryExecutionContext.createReadManyQueryAsync(diagnosticsClientContext, queryClient, sqlQuery, rangeQueryMap, - cosmosQueryRequestOptions, resourceId, + cosmosQueryRequestOptions, collection, collectionLink, activityId, klass, resourceTypeEnum, isQueryCancelledOnTimeout); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java index 40024c332fea..640c1c72bb74 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java @@ -8,6 +8,7 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PathsHelper; import com.azure.cosmos.implementation.routing.PartitionKeyInternal; import com.azure.cosmos.models.CosmosQueryRequestOptions; import com.azure.cosmos.models.ModelBridgeInternal; @@ -125,7 +126,7 @@ static Mono getQueryPlanThroughGatewayAsync(Diagn OperationType.QueryPlan, () -> queryClient.getResetSessionTokenRetryPolicy().getRequestPolicy(diagnosticsClientContext), queryPlanRequest, - executeFunc - ); + executeFunc, + PathsHelper.getCollectionPath(resourceLink)); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java index bb08e3b55235..017cfe460bae 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java @@ -5,6 +5,8 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -29,9 +31,11 @@ public ServerSideOnlyContinuationFetcherImpl(BiFunction cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { - super(executeFunc, isChangeFeed, top, maxItemCount, operationContext, cancelledRequestDiagnosticsTracker); + super(executeFunc, isChangeFeed, top, maxItemCount, operationContext, cancelledRequestDiagnosticsTracker, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); this.createRequestFunc = createRequestFunc; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index 30ad1696aa5e..e42bd12847ec 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -212,10 +212,10 @@ public URI resolveServiceEndpoint(RxDocumentServiceRequest request) { } public UnmodifiableList getApplicableWriteEndpoints(RxDocumentServiceRequest request) { - return this.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); + return this.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions(), request.requestContext.getUnavailableRegionsForPartition()); } - public UnmodifiableList getApplicableWriteEndpoints(List excludedRegionsOnRequest) { + public UnmodifiableList getApplicableWriteEndpoints(List excludedRegionsOnRequest, List unavailableRegionsForPartition) { UnmodifiableList writeEndpoints = this.getWriteEndpoints(); Supplier excludedRegionsSupplier = this.connectionPolicy.getExcludedRegionsSupplier(); @@ -223,7 +223,7 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe List effectiveExcludedRegions = isExcludedRegionsSupplierConfigured(excludedRegionsSupplier) ? new ArrayList<>(excludedRegionsSupplier.get().getExcludedRegions()) : Collections.emptyList(); - if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions)) { + if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions) && (unavailableRegionsForPartition == null || unavailableRegionsForPartition.isEmpty())) { return writeEndpoints; } @@ -231,26 +231,32 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe effectiveExcludedRegions = excludedRegionsOnRequest; } + List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); + + if (unavailableRegionsForPartition != null) { + effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); + } + // filter regions based on the exclude region config return this.getApplicableEndpoints( writeEndpoints, this.locationInfo.regionNameByWriteEndpoint, this.defaultEndpoint, - effectiveExcludedRegions); + effectiveExcludedRegionsWithPartitionUnavailableRegions); } public UnmodifiableList getApplicableReadEndpoints(RxDocumentServiceRequest request) { - return this.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()); + return this.getApplicableReadEndpoints(request.requestContext.getExcludeRegions(), request.requestContext.getUnavailableRegionsForPartition()); } - public UnmodifiableList getApplicableReadEndpoints(List excludedRegionsOnRequest) { + public UnmodifiableList getApplicableReadEndpoints(List excludedRegionsOnRequest, List unavailableRegionsForPartition) { UnmodifiableList readEndpoints = this.getReadEndpoints(); Supplier excludedRegionsSupplier = this.connectionPolicy.getExcludedRegionsSupplier(); List effectiveExcludedRegions = isExcludedRegionsSupplierConfigured(excludedRegionsSupplier) ? new ArrayList<>(excludedRegionsSupplier.get().getExcludedRegions()) : Collections.emptyList(); - if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions)) { + if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions) && (unavailableRegionsForPartition == null || unavailableRegionsForPartition.isEmpty())) { return readEndpoints; } @@ -258,12 +264,18 @@ public UnmodifiableList getApplicableReadEndpoints(List excludedReg effectiveExcludedRegions = excludedRegionsOnRequest; } + List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); + + if (unavailableRegionsForPartition != null) { + effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); + } + // filter regions based on the exclude region config return this.getApplicableEndpoints( readEndpoints, this.locationInfo.regionNameByReadEndpoint, this.locationInfo.writeEndpoints.get(0), // match the fallback region used in getPreferredAvailableEndpoints - effectiveExcludedRegions); + effectiveExcludedRegionsWithPartitionUnavailableRegions); } private UnmodifiableList getApplicableEndpoints( @@ -617,6 +629,8 @@ private UnmodifiableList getPreferredAvailableEndpoints(UnmodifiableMap(endpoints); } + + private UnmodifiableMap getEndpointByLocation(Iterable locations, Utils.ValueHolder> orderedLocations, Utils.ValueHolder> regionMap) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java index 6afe1805efa2..3772200f593d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java @@ -564,6 +564,22 @@ CosmosChangeFeedRequestOptionsImpl getImpl() { return this.actualRequestOptions; } + String getCollectionRid() { + return this.actualRequestOptions.getCollectionRid(); + } + + void setCollectionRid(String collectionRid) { + this.actualRequestOptions.setCollectionRid(collectionRid); + } + + PartitionKeyDefinition getPartitionKeyDefinition() { + return this.actualRequestOptions.getPartitionKeyDefinition(); + } + + void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// @@ -623,6 +639,26 @@ public CosmosChangeFeedRequestOptions createForProcessingFromContinuation( public CosmosChangeFeedRequestOptions clone(CosmosChangeFeedRequestOptions toBeCloned) { return new CosmosChangeFeedRequestOptions(toBeCloned); } + + @Override + public String getCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions) { + return changeFeedRequestOptions.getCollectionRid(); + } + + @Override + public void setCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions, String collectionRid) { + changeFeedRequestOptions.setCollectionRid(collectionRid); + } + + @Override + public PartitionKeyDefinition getPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions) { + return changeFeedRequestOptions.getPartitionKeyDefinition(); + } + + @Override + public void setPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions, PartitionKeyDefinition partitionKeyDefinition) { + changeFeedRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index c61747aa0e8a..c252ebfb5d3b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -522,15 +522,6 @@ CosmosQueryRequestOptions setPartitionKeyRangeIdInternal(String partitionKeyRang return this; } - PartitionKeyDefinition getPartitionKeyDefinition() { - return this.actualRequestOptions.getPartitionKeyDefinition(); - } - - CosmosQueryRequestOptions setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { - this.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); - return this; - } - /** * Sets the custom ids. * @@ -651,13 +642,22 @@ public Integer getMaxItemCountForVectorSearch(CosmosQueryRequestOptions options) @Override public void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition) { - options.setPartitionKeyDefinition(partitionKeyDefinition); + options.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); } @Override public PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options) { - return options.getPartitionKeyDefinition(); + return options.actualRequestOptions.getPartitionKeyDefinition(); + } + @Override + public void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid) { + options.actualRequestOptions.setCollectionRid(collectionRid); + } + + @Override + public String getCollectionRid(CosmosQueryRequestOptions options) { + return options.actualRequestOptions.getCollectionRid(); } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java index 92ba73f78271..0978d5fb2f20 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java @@ -81,6 +81,10 @@ public class FeedResponse implements ContinuablePage { this(results, header, true, nochanges, new ConcurrentHashMap<>()); } + FeedResponse(List results, Map header, boolean nochanges, CosmosDiagnostics diagnostics) { + this(results, header, true, nochanges, new ConcurrentHashMap<>(), diagnostics); + } + FeedResponse(List results, Map headers, CosmosDiagnostics diagnostics) { this(results, headers); @@ -116,6 +120,23 @@ private FeedResponse( this.cosmosDiagnostics = BridgeInternal.createCosmosDiagnostics(queryMetricsMap); } + private FeedResponse( + List results, + Map header, + boolean useEtagAsContinuation, + boolean nochanges, + ConcurrentMap queryMetricsMap, + CosmosDiagnostics diagnostics) { + this.results = results; + this.header = header; + this.usageHeaders = new HashMap<>(); + this.quotaHeaders = new HashMap<>(); + this.useEtagAsContinuation = useEtagAsContinuation; + this.nochanges = nochanges; + this.queryMetricsMap = new ConcurrentHashMap<>(queryMetricsMap); + this.cosmosDiagnostics = diagnostics; + } + private FeedResponse( List transformedResults, FeedResponse toBeCloned) { @@ -594,6 +615,13 @@ public FeedResponse createChangeFeedResponse(RxDocumentServiceResponse re response.getResponseHeaders(), noChanges(response)); } + @Override + public FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, CosmosItemSerializer itemSerializer, Class cls, CosmosDiagnostics diagnostics) { + return new FeedResponse<>( + noChanges(response) ? Collections.emptyList() : response.getQueryResponse(itemSerializer, cls), + response.getResponseHeaders(), noChanges(response), diagnostics); + } + @Override public boolean getNoChanges(FeedResponse feedResponse) { return feedResponse.getNoChanges(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java index 013d29ce5ab4..059bc624e6bc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java @@ -69,6 +69,7 @@ opens com.azure.cosmos.util to com.fasterxml.jackson.databind; opens com.azure.cosmos.implementation.throughputControl to com.fasterxml.jackson.databind; opens com.azure.cosmos.implementation.throughputControl.controller.group.global to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.circuitBreaker to com.fasterxml.jackson.databind; // exporting packages specifically for cosmos test exports com.azure.cosmos.implementation.faultinjection to com.azure.cosmos.test; diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index a564450f6fec..933a3eeb6322 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -8,6 +8,9 @@ "-Pdirect": "Direct", "-Pmulti-master": "MultiMaster", "-Pflaky-multi-master": "FlakyMultiMaster", + "-Pcircuit-breaker-misc-direct": "CircuitBreakerMiscDirect", + "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", + "-Pcircuit-breaker-read-all-read-many": "CircuitBreakerReadAllAndReadMany", "-Pmulti-region": "MultiRegion", "-Plong": "Long", "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"": "TCP", @@ -101,7 +104,7 @@ } }, "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pmulti-master", "-Pflaky-multi-master", "-Pfast", "-Pdirect" ], + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pmulti-master", "-Pflaky-multi-master", "-Pcircuit-breaker-misc-direct", "-Pcircuit-breaker-misc-gateway", "-Pcircuit-breaker-read-all-read-many", "-Pfast", "-Pdirect" ], "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index ffa8784d6ce9..e98292e25c7c 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -29,7 +29,7 @@ extends: groupId: com.azure - name: azure-cosmos-benchmark groupId: com.azure - TimeoutInMinutes: 120 + TimeoutInMinutes: 180 MaxParallel: 20 PreSteps: - template: /eng/pipelines/templates/steps/install-reporting-tools.yml