Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
a47452e
sync PPAF
simorenoh Jun 15, 2025
b8228e7
async changes
simorenoh Jun 16, 2025
151a2fa
Update test_per_partition_automatic_failover_async.py
simorenoh Jun 16, 2025
b9e0a08
CI fixes
simorenoh Jun 16, 2025
e4d7046
changelog
simorenoh Jun 16, 2025
09e7163
broken link
simorenoh Jun 16, 2025
4e28f66
Update test_location_cache.py
simorenoh Jun 16, 2025
c5319e8
change PPAF detection logic
simorenoh Jun 16, 2025
eba6093
Update _global_partition_endpoint_manager_circuit_breaker_core.py
simorenoh Jun 16, 2025
2ec5c5d
Update _global_partition_endpoint_manager_circuit_breaker_core.py
simorenoh Jun 17, 2025
62d7be0
fix tests and remove environment variable
tvaron3 Jun 18, 2025
b57949d
Merge branch 'main' of https://github.com/Azure/azure-sdk-for-python …
tvaron3 Jun 18, 2025
24b8415
fix tests
tvaron3 Jun 23, 2025
9595327
revert excluded locations change
tvaron3 Jul 2, 2025
8911ef5
fix analyze
tvaron3 Jul 3, 2025
25dbeb3
test excluded locations
tvaron3 Jul 7, 2025
d61a9a9
Add different error handling for 503 and 408s, update README
tvaron3 Jul 8, 2025
3f8ac23
Merge branch 'main' into cosmos-ppaf
simorenoh Jul 30, 2025
f1c69ed
mypy, cspell, pylint
simorenoh Jul 31, 2025
9306d15
remove tag from tests since config is service based
simorenoh Jul 31, 2025
bd07d83
add threshold-based retries for 408, 5xx errors
simorenoh Aug 7, 2025
80cc824
Merge branch 'main' into cosmos-ppaf
simorenoh Aug 8, 2025
2e5838c
update constant use, rollback session token PR change
simorenoh Aug 8, 2025
8b7d181
threshold based retries
simorenoh Aug 18, 2025
f25b660
Merge branch 'main' into cosmos-ppaf
simorenoh Aug 18, 2025
d8ed980
Update _base.py
simorenoh Aug 19, 2025
fcd5c60
cspell, test fixes
simorenoh Aug 19, 2025
93c76ad
Merge branch 'main' into cosmos-ppaf
simorenoh Aug 19, 2025
467a95d
Update _service_unavailable_retry_policy.py
simorenoh Aug 19, 2025
b9aa01c
mypy, pylint
simorenoh Aug 19, 2025
64f95e3
503 behavior change, use regional contexts
simorenoh Aug 21, 2025
d05fc5e
mypy, pylint, tests
simorenoh Aug 21, 2025
85b2007
special-casing 503s
simorenoh Aug 21, 2025
f8fa70a
small fix
simorenoh Aug 21, 2025
e5c5ac5
exclude region tests
simorenoh Aug 21, 2025
ccd9def
session retry tests
simorenoh Aug 22, 2025
1dccc5d
pylint, cspell
simorenoh Aug 22, 2025
ebf0b0d
Merge branch 'main' into cosmos-ppaf
simorenoh Aug 22, 2025
c2bb93a
change errors since 503 is now retried directly
simorenoh Aug 25, 2025
c3879d8
Update sdk/cosmos/azure-cosmos/README.md
simorenoh Aug 26, 2025
1d57bf2
address comments
simorenoh Aug 26, 2025
eec77e7
Update _service_unavailable_retry_policy.py
simorenoh Aug 26, 2025
4c2bf32
small test updates for 503 behavior
simorenoh Aug 26, 2025
05654a9
further comments
simorenoh Aug 27, 2025
f982d21
Update test_per_partition_circuit_breaker_sm_mrr.py
simorenoh Aug 27, 2025
d9ca7a4
test fixes
simorenoh Aug 27, 2025
f1dce5d
Update test_excluded_locations.py
simorenoh Aug 27, 2025
1582cf3
small improvement to region-finding
simorenoh Aug 29, 2025
8f7ec0c
pylint
simorenoh Aug 29, 2025
1c10349
Merge branch 'main' into cosmos-ppaf
simorenoh Aug 29, 2025
effb6d1
Update _global_partition_endpoint_manager_per_partition_automatic_fai…
simorenoh Aug 29, 2025
1e773f5
address comments, add threshold lock
simorenoh Aug 29, 2025
24a44d9
add more comments
simorenoh Aug 29, 2025
d07610a
Merge branch 'main' into cosmos-ppaf
simorenoh Sep 2, 2025
f984204
Merge branch 'main' into cosmos-ppaf
simorenoh Sep 4, 2025
c772092
edge cases
simorenoh Sep 19, 2025
143cf17
Merge branch 'main' into cosmos-ppaf
simorenoh Sep 19, 2025
ef9f73a
Merge branch 'main' into cosmos-ppaf
simorenoh Oct 2, 2025
3acda24
changes from testing
simorenoh Oct 7, 2025
9a6b17b
pylint
simorenoh Oct 7, 2025
c3e0035
Merge branch 'main' into cosmos-ppaf
simorenoh Oct 8, 2025
8f75444
fixes pylint/mypy
simorenoh Oct 8, 2025
0ccd9bf
mypy complaining about assigning str to none
simorenoh Oct 8, 2025
f4e4d65
testing changes - will roll back later
simorenoh Oct 8, 2025
4e276e1
Merge branch 'cosmos-ppaf' of https://github.com/Azure/azure-sdk-for-…
simorenoh Oct 8, 2025
8f87b13
Update _endpoint_discovery_retry_policy.py
simorenoh Oct 9, 2025
3e1f6be
Update _asynchronous_request.py
simorenoh Oct 17, 2025
42817fc
add user agent feature flags
simorenoh Oct 17, 2025
23f3b0d
Merge branch 'main' into cosmos-ppaf
simorenoh Oct 20, 2025
65f9e01
Update test_per_partition_automatic_failover_async.py
simorenoh Oct 20, 2025
e15e43d
move user agent logic
simorenoh Oct 24, 2025
0d7e887
sync and async match, remove print statements
simorenoh Oct 29, 2025
aa3b641
leftover timer
simorenoh Oct 29, 2025
799f6de
Update _retry_utility.py
simorenoh Oct 30, 2025
36249b4
use constants
simorenoh Oct 30, 2025
f5cd24b
Merge branch 'main' into cosmos-ppaf
simorenoh Oct 31, 2025
0495c7b
pylint
simorenoh Oct 31, 2025
335e10e
Merge branch 'main' into cosmos-ppaf
simorenoh Nov 17, 2025
2f004b7
Merge branch 'main' into cosmos-ppaf
simorenoh Nov 17, 2025
8639093
Update CHANGELOG.md
simorenoh Nov 17, 2025
5b3815f
react to comments
simorenoh Nov 19, 2025
e31d674
Update _retry_utility.py
simorenoh Nov 19, 2025
e55871c
mypy pylint
simorenoh Nov 19, 2025
0463a3f
test fixes
simorenoh Nov 20, 2025
cdfdc01
add lock to failure additions
simorenoh Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class _Constants:
DatabaseAccountEndpoint: Literal["databaseAccountEndpoint"] = "databaseAccountEndpoint"
DefaultEndpointsRefreshTime: int = 5 * 60 * 1000 # milliseconds
UnavailableEndpointDBATimeouts: int = 1 # seconds
EnablePerPartitionFailoverBehavior: Literal["enablePerPartitionFailoverBehavior"] = "enablePerPartitionFailoverBehavior" #pylint: disable=line-too-long

# ServiceDocument Resource
EnableMultipleWritableLocations: Literal["enableMultipleWriteLocations"] = "enableMultipleWriteLocations"
Expand All @@ -53,6 +54,8 @@ class _Constants:
MAX_ITEM_BUFFER_VS_CONFIG_DEFAULT: int = 50000
CIRCUIT_BREAKER_ENABLED_CONFIG: str = "AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER"
CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT: str = "False"
PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG: str = "AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"
PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT: str = "False"
# Only applicable when circuit breaker is enabled -------------------------
CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ: str = "AZURE_COSMOS_CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ"
CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ_DEFAULT: int = 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import

from . import _base as base
from ._global_partition_endpoint_manager_circuit_breaker import _GlobalPartitionEndpointManagerForCircuitBreaker
from ._global_partition_endpoint_manager_per_partition_automatic_failover import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover # pylint: disable=line-too-long
from . import _query_iterable as query_iterable
from . import _runtime_constants as runtime_constants
from . import _session
Expand Down Expand Up @@ -168,7 +168,7 @@ def __init__( # pylint: disable=too-many-statements
self.last_response_headers: CaseInsensitiveDict = CaseInsensitiveDict()

self.UseMultipleWriteLocations = False
self._global_endpoint_manager = _GlobalPartitionEndpointManagerForCircuitBreaker(self)
self._global_endpoint_manager = _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover(self)

retry_policy = None
if isinstance(self.connection_policy.ConnectionRetryConfiguration, HTTPPolicy):
Expand Down Expand Up @@ -2621,12 +2621,16 @@ def GetDatabaseAccount(
database_account._ReadableLocations = result[Constants.ReadableLocations]
if Constants.EnableMultipleWritableLocations in result:
database_account._EnableMultipleWritableLocations = result[
Constants.EnableMultipleWritableLocations
]
Constants.EnableMultipleWritableLocations]

self.UseMultipleWriteLocations = (
self.connection_policy.UseMultipleWriteLocations and database_account._EnableMultipleWritableLocations
)

# TODO: Verify that this is the correct variable from the service
if Constants.EnablePerPartitionFailoverBehavior in result:
database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]

if response_hook:
response_hook(last_response_headers, result)
return database_account
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ class EndpointDiscoveryRetryPolicy(object):
Max_retry_attempt_count = 120
Retry_after_in_milliseconds = 1000

def __init__(self, connection_policy, global_endpoint_manager, *args):
def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper, *args):
self.global_endpoint_manager = global_endpoint_manager
self.pk_range_wrapper = pk_range_wrapper
self._max_retry_attempt_count = EndpointDiscoveryRetryPolicy.Max_retry_attempt_count
self.failover_retry_count = 0
self.retry_after_in_milliseconds = EndpointDiscoveryRetryPolicy.Retry_after_in_milliseconds
Expand Down Expand Up @@ -85,6 +86,14 @@ def ShouldRetry(self, exception): # pylint: disable=unused-argument
# refreshed with new writable and readable locations
self.global_endpoint_manager.refresh_needed = True

# If per partition automatic failover is applicable, we mark the current endpoint as unavailable
# and resolve the service endpoint for the partition range - otherwise, continue with the default retry logic
if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
return True

# clear previous location-based routing directive
self.request.clear_route_to_location()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def record_failure(
if pk_range_wrapper:
self.global_partition_endpoint_manager_core.record_failure(request, pk_range_wrapper)

def resolve_service_endpoint_for_partition(
def _resolve_service_endpoint_for_partition_circuit_breaker(
self,
request: RequestObject,
pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
if not request:
return False

circuit_breaker_enabled = os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT) == "True"
circuit_breaker_enabled = os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT)).lower() == "true"
if not circuit_breaker_enabled:
return False

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# The MIT License (MIT)
# Copyright (c) 2025 Microsoft Corporation

"""Class for global endpoint manager for per partition automatic failover. This class inherits the circuit breaker
endpoint manager, since enabling per partition automatic failover also enables the circuit breaker logic.
"""
import logging
import os
import threading

from typing import Dict, Set, TYPE_CHECKING, Optional

from azure.cosmos.http_constants import ResourceType
from azure.cosmos._constants import _Constants as Constants
from azure.cosmos._global_partition_endpoint_manager_circuit_breaker import \
_GlobalPartitionEndpointManagerForCircuitBreaker
from azure.cosmos.documents import _OperationType

from azure.cosmos._request_object import RequestObject
from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper

if TYPE_CHECKING:
from azure.cosmos.aio._cosmos_client_connection_async import CosmosClientConnection

logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")

# pylint: disable=name-too-long, protected-access

class PartitionLevelFailoverInfo:
"""
Holds information about the partition level regional failover.
Used to track the partition key range and the regions where it is available.
"""
def __init__(self):
self.unavailable_regional_endpoints = set()
self.current_regional_endpoint = None
self._lock = threading.Lock()

def try_move_to_next_location(self, available_account_regional_endpoints: Set[str], request: RequestObject) -> bool:
with self._lock:
failed_regional_endpoint = request.location_endpoint_to_route
if failed_regional_endpoint != self.current_regional_endpoint:
logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
request.route_to_location(self.current_regional_endpoint)
return True

for regional_endpoint in available_account_regional_endpoints:
if regional_endpoint == self.current_regional_endpoint:
continue

if regional_endpoint in self.unavailable_regional_endpoints:
continue

self.current_regional_endpoint = regional_endpoint
logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
request.route_to_location(self.current_regional_endpoint)
return True

return False

class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover(_GlobalPartitionEndpointManagerForCircuitBreaker):
"""
This internal class implements the logic for partition endpoint management for
geo-replicated database accounts.
"""
def __init__(self, client: "CosmosClientConnection"):
super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover, self).__init__(client)
self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}

def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
if not request:
return False

if (self.location_cache.can_use_multiple_write_locations_for_request(request)
or _OperationType.IsReadOnlyOperation(request.operation_type)):
return False

per_partition_automatic_failover_config_enabled = (
os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true")

# TODO: This check here needs to be verified once we test against a live account with the config enabled.
if (not per_partition_automatic_failover_config_enabled or
not self._database_account_cache._EnablePerPartitionFailoverBehavior):
return False

# if we have at most one region available in the account, we cannot do per partition automatic failover
available_regions = self.compute_available_preferred_regions(request)
if len(available_regions) <= 1:
return False

# if the request is not for a document or if the request is not executing a stored procedure, return False
if (request.resource_type != ResourceType.Document and
request.operation_type != _OperationType.ExecuteJavaScript):
return False

return True

def resolve_service_endpoint_for_partition(
self,
request: RequestObject,
pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
) -> str:
if self.is_per_partition_automatic_failover_applicable(request) and pk_range_wrapper:
# If per partition automatic failover is applicable, we check partition unavailability
if pk_range_wrapper in self.partition_range_to_failover_info:
logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
if request.location_endpoint_to_route is not None:
if request.location_endpoint_to_route in partition_failover_info.unavailable_regional_endpoints:
# If the current region is unavailable, we try to move to the next available region
if not partition_failover_info.try_move_to_next_location(
self.compute_available_preferred_regions(request),
request):
logger.info("All available regions for partition are unavailable. Refreshing cache.")
# If no other region is available, we invalidate the cache and start once again from our
# main write region in the account configurations
self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
request.clear_route_to_location()
return self._resolve_service_endpoint(request)
else:
# Update the current regional endpoint to whatever the request is routing to
partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
else:
partition_failover_info = PartitionLevelFailoverInfo()
partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
return self._resolve_service_endpoint(request)
return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)

def compute_available_preferred_regions(
self,
request: RequestObject
) -> Set[str]:
"""
Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
:param RequestObject request: The request object containing the routing context.
:return: A set of available regional endpoints.
:rtype: Set[str]
"""
excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
preferred_locations = self.PreferredLocations
available_regions = [item for item in preferred_locations if item not in excluded_locations]
available_regional_endpoints = {
self.location_cache.account_read_regional_routing_contexts_by_location[region].primary_endpoint
for region in available_regions
}
return available_regional_endpoints
8 changes: 3 additions & 5 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,11 @@ def get_ordered_read_locations(self):
def _get_configured_excluded_locations(self, request: RequestObject) -> List[str]:
# If excluded locations were configured on request, use request level excluded locations.
excluded_locations = request.excluded_locations
if excluded_locations is None:
if len(excluded_locations) == 0:
if self.connection_policy.ExcludedLocations:
# If excluded locations were only configured on client(connection_policy), use client level
# make copy of excluded locations to avoid modifying the original list
excluded_locations = list(self.connection_policy.ExcludedLocations)
else:
excluded_locations = []
for excluded_location in request.excluded_locations_circuit_breaker:
if excluded_location not in excluded_locations:
excluded_locations.append(excluded_location)
Expand Down Expand Up @@ -445,7 +443,7 @@ def update_location_cache(self, write_locations=None, read_locations=None, enabl
)

def get_preferred_regional_routing_contexts(
self, endpoints_by_location, orderedLocations, expected_available_operation, fallback_endpoint
self, endpoints_by_location, ordered_locations, expected_available_operation, fallback_endpoint
):
regional_endpoints = []
# if enableEndpointDiscovery is false, we always use the defaultEndpoint that
Expand Down Expand Up @@ -475,7 +473,7 @@ def get_preferred_regional_routing_contexts(

regional_endpoints.extend(unavailable_endpoints)
else:
for location in orderedLocations:
for location in ordered_locations:
if location and location in endpoints_by_location:
# location is empty during manual failover
regional_endpoint = endpoints_by_location[location]
Expand Down
2 changes: 1 addition & 1 deletion sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
self.location_index_to_route: Optional[int] = None
self.location_endpoint_to_route: Optional[str] = None
self.last_routed_location_endpoint_within_region: Optional[str] = None
self.excluded_locations: Optional[List[str]] = None
self.excluded_locations: List[str] = []
self.excluded_locations_circuit_breaker: List[str] = []
self.healthy_tentative_location: Optional[str] = None

Expand Down
5 changes: 3 additions & 2 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,12 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
:rtype: tuple of (dict, dict)
"""
pk_range_wrapper = None
if args and global_endpoint_manager.is_circuit_breaker_applicable(args[0]):
if args and (global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]) or
global_endpoint_manager.is_circuit_breaker_applicable(args[0])):
pk_range_wrapper = global_endpoint_manager.create_pk_range_wrapper(args[0])
# instantiate all retry policies here to be applied for each request execution
endpointDiscovery_retry_policy = _endpoint_discovery_retry_policy.EndpointDiscoveryRetryPolicy(
client.connection_policy, global_endpoint_manager, *args
client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args
)
database_account_retry_policy = _database_account_retry_policy.DatabaseAccountRetryPolicy(
client.connection_policy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ def _Request(global_endpoint_manager, request_params, connection_policy, pipelin
base_url = request_params.endpoint_override
else:
pk_range_wrapper = None
if global_endpoint_manager.is_circuit_breaker_applicable(request_params):
# Circuit breaker is applicable, so we need to use the endpoint from the request
if (global_endpoint_manager.is_circuit_breaker_applicable(request_params) or
global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_params)):
# Circuit breaker or per-partition failover are applicable, so we need to use the endpoint from the request
pk_range_wrapper = global_endpoint_manager.create_pk_range_wrapper(request_params)
base_url = global_endpoint_manager.resolve_service_endpoint_for_partition(request_params, pk_range_wrapper)
if not request.url.startswith(base_url):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def ShouldRetry(self, _exception):
:returns: a boolean stating whether the request should be retried
:rtype: bool
"""
# we don't retry on write operations for timeouts or any internal server errors
if self.request and (not _OperationType.IsReadOnlyOperation(self.request.operation_type)):
if self.request and (not _OperationType.IsReadOnlyOperation(self.request.operation_type) and
not self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request)):
return False

if not self.connection_policy.EnableEndpointDiscovery:
Expand All @@ -46,6 +46,14 @@ def ShouldRetry(self, _exception):

# This function prepares the request to go to the next region
def resolve_next_region_service_endpoint(self):
if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
# If per partition automatic failover is applicable, we mark the current endpoint as unavailable
# and resolve the service endpoint for the partition range - otherwise, continue with default retry logic
partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request,
self.pk_range_wrapper)

# clear previous location-based routing directive
self.request.clear_route_to_location()
# clear the last routed endpoint within same region since we are going to a new region now
Expand Down
Loading
Loading