From a47452ee6c4d7307644f1ff0c7ab4c9accfd0a92 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Sun, 15 Jun 2025 19:07:36 -0400
Subject: [PATCH 01/68] sync PPAF

---
 .../azure-cosmos/azure/cosmos/_constants.py   |   3 +
 .../azure/cosmos/_cosmos_client_connection.py |   7 +-
 .../_endpoint_discovery_retry_policy.py       |  11 +-
 ...tition_endpoint_manager_circuit_breaker.py |   2 +-
 ...n_endpoint_manager_circuit_breaker_core.py |   5 +-
 ...anager_per_partition_automatic_failover.py | 143 +++++++++++++
 .../azure/cosmos/_location_cache.py           |   8 +-
 .../azure/cosmos/_request_object.py           |   2 +-
 .../azure/cosmos/_retry_utility.py            |   5 +-
 .../azure/cosmos/_synchronized_request.py     |   5 +-
 .../cosmos/_timeout_failover_retry_policy.py  |  11 +-
 .../azure-cosmos/azure/cosmos/documents.py    |   1 +
 sdk/cosmos/azure-cosmos/pytest.ini            |   1 +
 .../tests/_fault_injection_transport.py       | 102 ++++++++--
 .../azure-cosmos/tests/test_location_cache.py |   6 +-
 .../test_per_partition_automatic_failover.py  | 191 ++++++++++++++++++
 ...st_per_partition_circuit_breaker_sm_mrr.py |   3 +-
 .../test_service_retry_policies_async.py      |   2 +-
 sdk/cosmos/live-platform-matrix.json          |  17 ++
 sdk/cosmos/test-resources.bicep               |   4 +
 20 files changed, 493 insertions(+), 36 deletions(-)
 create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
 create mode 100644 sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
index d0e0f54ae04c..c2812f3481f8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
@@ -40,6 +40,7 @@ class _Constants:
     DatabaseAccountEndpoint: Literal["databaseAccountEndpoint"] = "databaseAccountEndpoint"
     DefaultEndpointsRefreshTime: int = 5 * 60 * 1000 # milliseconds
     UnavailableEndpointDBATimeouts: int = 1 # seconds
+    EnablePerPartitionFailoverBehavior: Literal["enablePerPartitionFailoverBehavior"] = "enablePerPartitionFailoverBehavior" #pylint: disable=line-too-long
 
     # ServiceDocument Resource
     EnableMultipleWritableLocations: Literal["enableMultipleWriteLocations"] = "enableMultipleWriteLocations"
@@ -53,6 +54,8 @@ class _Constants:
     MAX_ITEM_BUFFER_VS_CONFIG_DEFAULT: int = 50000
     CIRCUIT_BREAKER_ENABLED_CONFIG: str =  "AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER"
     CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT: str = "False"
+    PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG: str =  "AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"
+    PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT: str = "False"
     # Only applicable when circuit breaker is enabled -------------------------
     CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ: str = "AZURE_COSMOS_CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ"
     CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ_DEFAULT: int = 10
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
index 3ee11eeccdd8..aeda60b03ce3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
@@ -48,7 +48,7 @@
     HttpResponse  # pylint: disable=no-legacy-azure-core-http-response-import
 
 from . import _base as base
-from ._global_partition_endpoint_manager_circuit_breaker import _GlobalPartitionEndpointManagerForCircuitBreaker
+from ._global_partition_endpoint_manager_per_partition_automatic_failover import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover # pylint: disable=line-too-long
 from . import _query_iterable as query_iterable
 from . import _runtime_constants as runtime_constants
 from . import _session
@@ -168,7 +168,7 @@ def __init__( # pylint: disable=too-many-statements
         self.last_response_headers: CaseInsensitiveDict = CaseInsensitiveDict()
 
         self.UseMultipleWriteLocations = False
-        self._global_endpoint_manager = _GlobalPartitionEndpointManagerForCircuitBreaker(self)
+        self._global_endpoint_manager = _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover(self)
 
         retry_policy = None
         if isinstance(self.connection_policy.ConnectionRetryConfiguration, HTTPPolicy):
@@ -2623,6 +2623,9 @@ def GetDatabaseAccount(
             database_account._EnableMultipleWritableLocations = result[
                 Constants.EnableMultipleWritableLocations
             ]
+        # TODO: PPAF - Verify that this is the correct variable from the service
+        if Constants.EnablePerPartitionFailoverBehavior in result:
+            database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]
 
         self.UseMultipleWriteLocations = (
                 self.connection_policy.UseMultipleWriteLocations and database_account._EnableMultipleWritableLocations
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index aa7bc67d2137..f562df2a7189 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -43,8 +43,9 @@ class EndpointDiscoveryRetryPolicy(object):
     Max_retry_attempt_count = 120
     Retry_after_in_milliseconds = 1000
 
-    def __init__(self, connection_policy, global_endpoint_manager, *args):
+    def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper, *args):
         self.global_endpoint_manager = global_endpoint_manager
+        self.pk_range_wrapper = pk_range_wrapper
         self._max_retry_attempt_count = EndpointDiscoveryRetryPolicy.Max_retry_attempt_count
         self.failover_retry_count = 0
         self.retry_after_in_milliseconds = EndpointDiscoveryRetryPolicy.Retry_after_in_milliseconds
@@ -85,6 +86,14 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
         # refreshed with new writable and readable locations
         self.global_endpoint_manager.refresh_needed = True
 
+        # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
+        # and resolve the service endpoint for the partition range - otherwise, continue with the default retry logic
+        if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+            partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
+            partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
+            self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
+            return True
+
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
index 2eda20c926d0..00e247701dc7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
@@ -100,7 +100,7 @@ def record_failure(
             if pk_range_wrapper:
                 self.global_partition_endpoint_manager_core.record_failure(request, pk_range_wrapper)
 
-    def resolve_service_endpoint_for_partition(
+    def _resolve_service_endpoint_for_partition_circuit_breaker(
             self,
             request: RequestObject,
             pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
index 93faf9b7a8c5..f5335fc447ff 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
@@ -59,8 +59,9 @@ def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
         if not request:
             return False
 
-        circuit_breaker_enabled = os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
-                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT) == "True"
+        circuit_breaker_enabled = os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
+                                                 os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
+                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT)).lower() == "true"
         if not circuit_breaker_enabled:
             return False
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
new file mode 100644
index 000000000000..03a3080cdefe
--- /dev/null
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -0,0 +1,143 @@
+# The MIT License (MIT)
+# Copyright (c) 2025 Microsoft Corporation
+
+"""Class for global endpoint manager for per partition automatic failover. This class inherits the circuit breaker
+endpoint manager, since enabling per partition automatic failover also enables the circuit breaker logic.
+"""
+import logging
+import os
+import threading
+
+from typing import Dict, List, Set, TYPE_CHECKING, Optional
+
+from azure.cosmos.http_constants import ResourceType
+from azure.cosmos._constants import _Constants as Constants
+from azure.cosmos._global_partition_endpoint_manager_circuit_breaker import \
+    _GlobalPartitionEndpointManagerForCircuitBreaker
+from azure.cosmos.documents import _OperationType
+
+from azure.cosmos._request_object import RequestObject
+from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
+
+if TYPE_CHECKING:
+    from azure.cosmos._cosmos_client_connection import CosmosClientConnection
+
+logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
+
+class PartitionLevelFailoverInfo:
+    """
+    Holds information about the partition level regional failover.
+    It is used to track the partition key range and the regions where it is available.
+    """
+    def __init__(self):
+        self.unavailable_regional_endpoints = set()
+        self.current_regional_endpoint = None
+        self._lock = threading.Lock()
+
+    def try_move_to_next_location(self, available_account_regional_endpoints: Set[str], request: RequestObject) -> bool:
+        with self._lock:
+            failed_regional_endpoint = request.location_endpoint_to_route
+            if failed_regional_endpoint != self.current_regional_endpoint:
+                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
+                request.route_to_location(self.current_regional_endpoint)
+                return True
+
+            for regional_endpoint in available_account_regional_endpoints:
+                if regional_endpoint == self.current_regional_endpoint:
+                    continue
+
+                if regional_endpoint in self.unavailable_regional_endpoints:
+                    continue
+
+                self.current_regional_endpoint = regional_endpoint
+                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
+                request.route_to_location(self.current_regional_endpoint)
+                return True
+
+            return False
+
+class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover(_GlobalPartitionEndpointManagerForCircuitBreaker):
+    """
+    This internal class implements the logic for partition endpoint management for
+    geo-replicated database accounts.
+    """
+    def __init__(self, client: "CosmosClientConnection"):
+        super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover, self).__init__(client)
+        self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
+
+    def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
+        if not request:
+            return False
+
+        if (self.location_cache.can_use_multiple_write_locations_for_request(request)
+                or _OperationType.IsReadOnlyOperation(request.operation_type)):
+            return False
+
+        per_partition_automatic_failover_config_enabled = os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
+                                                        Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true"
+
+        # TODO: PPAF - This check here needs to be verified once we test against a live account with the config enabled.
+        if not per_partition_automatic_failover_config_enabled or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
+            return False
+
+        # if we have at most one region available in the account, we cannot do per partition automatic failover
+        available_regions = self.compute_available_preferred_regions(request)
+        if len(available_regions) <= 1:
+            return False
+
+        # if the request is not for a document or if the request is not executing a stored procedure, return False
+        if (request.resource_type != ResourceType.Document and
+                request.operation_type != _OperationType.ExecuteJavaScript):
+            return False
+
+        return True
+
+    def resolve_service_endpoint_for_partition(
+            self,
+            request: RequestObject,
+            pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
+    ) -> str:
+        if self.is_per_partition_automatic_failover_applicable(request) and pk_range_wrapper:
+            # If per partition automatic failover is applicable, we check partition unavailability
+            if pk_range_wrapper in self.partition_range_to_failover_info:
+                print("Resolving service endpoint for partition with per partition automatic failover enabled.")
+                partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
+                if request.location_endpoint_to_route is not None:
+                    if request.location_endpoint_to_route in partition_failover_info.unavailable_regional_endpoints:
+                        # If the current region is unavailable, we try to move to the next available region
+                        if not partition_failover_info.try_move_to_next_location(
+                                self.compute_available_preferred_regions(request),
+                                request):
+                            logger.info("All available regions for partition are unavailable. Refreshing cache.")
+                            # If no other region is available, we invalidate the cache and start once again from our
+                            # main write region in the account configurations
+                            self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
+                            request.clear_route_to_location()
+                            return self._resolve_service_endpoint(request)
+                    else:
+                        # Update the current regional endpoint to whatever the request is routing to
+                        partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+            else:
+                partition_failover_info = PartitionLevelFailoverInfo()
+                partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+                self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
+
+            return self._resolve_service_endpoint(request)
+        else:
+            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
+
+    def compute_available_preferred_regions(
+            self,
+            request: RequestObject
+    ) -> Set[str]:
+        """
+        Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
+        """
+        excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
+        preferred_locations = self.PreferredLocations
+        available_regions = [item for item in preferred_locations if item not in excluded_locations]
+        available_regional_endpoints = {
+            self.location_cache.account_read_regional_routing_contexts_by_location[region].primary_endpoint
+            for region in available_regions
+        }
+        return available_regional_endpoints
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
index 90578c63e5dd..5363a31f3b30 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
@@ -210,13 +210,11 @@ def get_ordered_read_locations(self):
     def _get_configured_excluded_locations(self, request: RequestObject) -> List[str]:
         # If excluded locations were configured on request, use request level excluded locations.
         excluded_locations = request.excluded_locations
-        if excluded_locations is None:
+        if len(excluded_locations) == 0:
             if self.connection_policy.ExcludedLocations:
                 # If excluded locations were only configured on client(connection_policy), use client level
                 # make copy of excluded locations to avoid modifying the original list
                 excluded_locations = list(self.connection_policy.ExcludedLocations)
-            else:
-                excluded_locations = []
         for excluded_location in request.excluded_locations_circuit_breaker:
             if excluded_location not in excluded_locations:
                 excluded_locations.append(excluded_location)
@@ -445,7 +443,7 @@ def update_location_cache(self, write_locations=None, read_locations=None, enabl
         )
 
     def get_preferred_regional_routing_contexts(
-        self, endpoints_by_location, orderedLocations, expected_available_operation, fallback_endpoint
+        self, endpoints_by_location, ordered_locations, expected_available_operation, fallback_endpoint
     ):
         regional_endpoints = []
         # if enableEndpointDiscovery is false, we always use the defaultEndpoint that
@@ -475,7 +473,7 @@ def get_preferred_regional_routing_contexts(
 
                 regional_endpoints.extend(unavailable_endpoints)
             else:
-                for location in orderedLocations:
+                for location in ordered_locations:
                     if location and location in endpoints_by_location:
                         # location is empty during manual failover
                         regional_endpoint = endpoints_by_location[location]
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
index d20eedb40148..d43407a40a72 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
@@ -40,7 +40,7 @@ def __init__(
         self.location_index_to_route: Optional[int] = None
         self.location_endpoint_to_route: Optional[str] = None
         self.last_routed_location_endpoint_within_region: Optional[str] = None
-        self.excluded_locations: Optional[List[str]] = None
+        self.excluded_locations: List[str] = []
         self.excluded_locations_circuit_breaker: List[str] = []
         self.healthy_tentative_location: Optional[str] = None
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 91145ef217ba..50c26e87cb62 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -63,11 +63,12 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
     :rtype: tuple of (dict, dict)
     """
     pk_range_wrapper = None
-    if args and global_endpoint_manager.is_circuit_breaker_applicable(args[0]):
+    if args and (global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]) or
+                 global_endpoint_manager.is_circuit_breaker_applicable(args[0])):
         pk_range_wrapper = global_endpoint_manager.create_pk_range_wrapper(args[0])
     # instantiate all retry policies here to be applied for each request execution
     endpointDiscovery_retry_policy = _endpoint_discovery_retry_policy.EndpointDiscoveryRetryPolicy(
-        client.connection_policy, global_endpoint_manager, *args
+        client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args
     )
     database_account_retry_policy = _database_account_retry_policy.DatabaseAccountRetryPolicy(
         client.connection_policy
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index e41881429b20..bb338f443dca 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -107,8 +107,9 @@ def _Request(global_endpoint_manager, request_params, connection_policy, pipelin
         base_url = request_params.endpoint_override
     else:
         pk_range_wrapper = None
-        if global_endpoint_manager.is_circuit_breaker_applicable(request_params):
-            # Circuit breaker is applicable, so we need to use the endpoint from the request
+        if (global_endpoint_manager.is_circuit_breaker_applicable(request_params) or
+                global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_params)):
+            # Circuit breaker or per-partition failover are applicable, so we need to use the endpoint from the request
             pk_range_wrapper = global_endpoint_manager.create_pk_range_wrapper(request_params)
         base_url = global_endpoint_manager.resolve_service_endpoint_for_partition(request_params, pk_range_wrapper)
     if not request.url.startswith(base_url):
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index b77ce1a69f13..70f6fdd2e299 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -27,8 +27,8 @@ def ShouldRetry(self, _exception):
         :returns: a boolean stating whether the request should be retried
         :rtype: bool
         """
-        # we don't retry on write operations for timeouts or any internal server errors
-        if self.request and (not _OperationType.IsReadOnlyOperation(self.request.operation_type)):
+        if self.request and (not _OperationType.IsReadOnlyOperation(self.request.operation_type) and
+                        not self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request)):
             return False
 
         if not self.connection_policy.EnableEndpointDiscovery:
@@ -46,6 +46,13 @@ def ShouldRetry(self, _exception):
 
     # This function prepares the request to go to the next region
     def resolve_next_region_service_endpoint(self):
+        if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+            # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
+            # and resolve the service endpoint for the partition range - otherwise, continue with the default retry logic
+            partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
+            partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
+            return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
+
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
         # clear the last routed endpoint within same region since we are going to a new region now
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py b/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
index a0e55077aefa..3008bed9b349 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
@@ -78,6 +78,7 @@ def __init__(self) -> None:
         self._WritableLocations: List[dict] = []
         self._ReadableLocations: List[dict] = []
         self._EnableMultipleWritableLocations = False
+        self._EnablePerPartitionFailoverBehavior = False
 
     @property
     def WritableLocations(self) -> List[Dict[Any, Any]]:
diff --git a/sdk/cosmos/azure-cosmos/pytest.ini b/sdk/cosmos/azure-cosmos/pytest.ini
index aabe78b51f08..a5e006ea027e 100644
--- a/sdk/cosmos/azure-cosmos/pytest.ini
+++ b/sdk/cosmos/azure-cosmos/pytest.ini
@@ -7,3 +7,4 @@ markers =
     cosmosMultiRegion: marks tests running on a Cosmos DB live account with multi-region and multi-write enabled.
     cosmosCircuitBreaker: marks tests running on Cosmos DB live account with per partition circuit breaker enabled and multi-write enabled.
     cosmosCircuitBreakerMultiRegion: marks tests running on Cosmos DB live account with one write region and multiple read regions and per partition circuit breaker enabled.
+    cosmosPerPartitionAutomaticFailover: marks tests running on Cosmos DB live account with one write region and multiple read regions and per partition automatic failover enabled.
diff --git a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
index 2386e54fd882..127a21cb0bd4 100644
--- a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
+++ b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
@@ -26,7 +26,7 @@
 import logging
 import sys
 from time import sleep
-from typing import Callable, Optional, Any, Dict, List, MutableMapping
+from typing import Callable, Optional, Any, Dict, List, Mapping, MutableMapping, Tuple, Sequence
 
 from azure.core.pipeline.transport import HttpRequest, HttpResponse
 from azure.core.pipeline.transport._requests_basic import RequestsTransport, RequestsTransportResponse
@@ -63,8 +63,29 @@ def error_with_counter(self, error: Exception) -> Exception:
         self.counters[ERROR_WITH_COUNTER] += 1
         return error
 
-    def add_fault(self, predicate: Callable[[HttpRequest], bool], fault_factory: Callable[[HttpRequest], Exception]):
-        self.faults.append({"predicate": predicate, "apply": fault_factory})
+    def add_fault(self,
+                  predicate: Callable[[HttpRequest], bool],
+                  fault_factory: Callable[[HttpRequest], Exception],
+                  max_inner_count: Optional[int] = None,
+                  after_max_count: Optional[Callable[[HttpRequest], RequestsTransportResponse]] = None):
+        """ Adds a fault to the transport that will be applied when the predicate matches the request.
+        :param Callable predicate: A callable that takes an HttpRequest and returns True if the fault should be applied.
+        :param Callable fault_factory: A callable that takes an HttpRequest and returns an Exception to be raised.
+        :param int max_inner_count: Optional maximum number of times the fault can be applied for one request.
+            If None, the fault will be applied every time the predicate matches.
+        :param Callable after_max_count: Optional callable that takes an HttpRequest and returns a
+            RequestsTransportResponse. Used to return a different response after the maximum number of faults has
+            been applied. Can only be used if `max_inner_count` is not None.
+        """
+        if max_inner_count is not None:
+            if after_max_count is not None:
+                self.faults.append({"predicate": predicate, "apply": fault_factory, "after_max_count": after_max_count,
+                                    "max_count": max_inner_count, "current_count": 0})
+            else:
+                self.faults.append({"predicate": predicate, "apply": fault_factory,
+                                    "max_count": max_inner_count, "current_count": 0})
+        else:
+            self.faults.append({"predicate": predicate, "apply": fault_factory})
 
     def add_response_transformation(self, predicate: Callable[[HttpRequest], bool], response_transformation: Callable[[HttpRequest, Callable[[HttpRequest], RequestsTransportResponse]], RequestsTransportResponse]):
         self.responseTransformations.append({
@@ -85,6 +106,16 @@ def send(self, request: HttpRequest, *, proxies: Optional[MutableMapping[str, st
         # find the first fault Factory with matching predicate if any
         first_fault_factory = FaultInjectionTransport.__first_item(iter(self.faults), lambda f: f["predicate"](request))
         if first_fault_factory:
+            if "max_count" in first_fault_factory:
+                FaultInjectionTransport.logger.info(f"Found fault factory with max count {first_fault_factory['max_count']}")
+                if first_fault_factory["current_count"] >= first_fault_factory["max_count"]:
+                    first_fault_factory["current_count"] = 0 # reset counter
+                    if "after_max_count" in first_fault_factory:
+                        FaultInjectionTransport.logger.info("Max count reached, returning after_max_count")
+                        return first_fault_factory["after_max_count"]
+                    FaultInjectionTransport.logger.info("Max count reached, skipping fault injection")
+                    return super().send(request, proxies=proxies, **kwargs)
+                first_fault_factory["current_count"] += 1
             FaultInjectionTransport.logger.info("--> FaultInjectionTransport.ApplyFaultInjection")
             injected_error = first_fault_factory["apply"](request)
             FaultInjectionTransport.logger.info("Found to-be-injected error {}".format(injected_error))
@@ -132,12 +163,21 @@ def print_call_stack():
             frame = frame.f_back
 
     @staticmethod
-    def predicate_req_payload_contains_id(r: HttpRequest, id_value: str):
+    def predicate_req_payload_contains_id(r: HttpRequest, id_value: str) -> bool:
         if r.body is None:
             return False
 
         return '"id":"{}"'.format(id_value) in r.body
 
+    @staticmethod
+    def predicate_req_payload_contains_field(r: HttpRequest, field_name: str, field_value: Optional[str]) -> bool:
+        if r.body is None:
+            return False
+        if field_value is None:
+            return '"{}":"'.format(field_name) in r.body
+        else:
+            return '"{}":"{}"'.format(field_name, field_value) in r.body
+
     @staticmethod
     def predicate_req_for_document_with_id(r: HttpRequest, id_value: str) -> bool:
         return (FaultInjectionTransport.predicate_url_contains_id(r, id_value)
@@ -163,15 +203,8 @@ def predicate_is_resource_type(r: HttpRequest, resource_type: str) -> bool:
     @staticmethod
     def predicate_is_operation_type(r: HttpRequest, operation_type: str) -> bool:
         is_operation_type = r.headers.get(HttpHeaders.ThinClientProxyOperationType) == operation_type
-
         return is_operation_type
 
-    @staticmethod
-    def predicate_is_resource_type(r: HttpRequest, resource_type: str) -> bool:
-        is_resource_type = r.headers.get(HttpHeaders.ThinClientProxyResourceType) == resource_type
-
-        return is_resource_type
-
     @staticmethod
     def predicate_is_write_operation(r: HttpRequest, uri_prefix: str) -> bool:
         is_write_document_operation = documents._OperationType.IsWriteOperation(
@@ -209,7 +242,8 @@ def error_service_response() -> Exception:
     def transform_topology_swr_mrr(
             write_region_name: str,
             read_region_name: str,
-            inner: Callable[[], RequestsTransportResponse]) -> RequestsTransportResponse:
+            inner: Callable[[], RequestsTransportResponse],
+            enable_per_partition_failover: bool = False) -> RequestsTransportResponse:
 
         response = inner()
         if not FaultInjectionTransport.predicate_is_database_account_call(response.request):
@@ -225,6 +259,31 @@ def transform_topology_swr_mrr(
             writable_locations[0]["name"] = write_region_name
             readable_locations.append({"name": read_region_name, "databaseAccountEndpoint" : test_config.TestConfig.local_host})
             FaultInjectionTransport.logger.info("Transformed Account Topology: {}".format(result))
+            # TODO: PPAF - need to verify below behavior against actual Cosmos DB service response
+            if enable_per_partition_failover:
+                result["enablePerPartitionFailoverBehavior"] = True
+            request: HttpRequest = response.request
+            return FaultInjectionTransport.MockHttpResponse(request, 200, result)
+
+        return response
+
+    @staticmethod
+    def transform_topology_ppaf_enabled(
+            inner: Callable[[], RequestsTransportResponse],
+            enable_per_partition_failover: bool = False) -> RequestsTransportResponse:
+
+        response = inner()
+        if not FaultInjectionTransport.predicate_is_database_account_call(response.request):
+            return response
+
+        data = response.body()
+        if response.status_code == 200 and data:
+            data = data.decode("utf-8")
+            result = json.loads(data)
+            FaultInjectionTransport.logger.info("Transformed Account Topology: {}".format(result))
+            # TODO: PPAF - need to verify below behavior against actual Cosmos DB service response
+            if enable_per_partition_failover:
+                result["enablePerPartitionFailoverBehavior"] = True
             request: HttpRequest = response.request
             return FaultInjectionTransport.MockHttpResponse(request, 200, result)
 
@@ -267,8 +326,25 @@ def transform_topology_mwr(
 
         return response
 
+    class MockHttpRequest(HttpRequest):
+        def __init__(
+                self,
+                url: str,
+                method: str = "GET",
+                headers: Optional[Mapping[str, str]] = None,
+                files: Optional[Any] = None,
+                data: Optional[Any] = None,
+        ) -> None:
+            self.method = method
+            self.url = url
+            self.headers: Optional[MutableMapping[str, str]] = headers
+            self.files: Optional[Any] = files
+            self.data: Optional[Any] = data
+            self.multipart_mixed_info: Optional[
+                Tuple[Sequence[Any], Sequence[Any], Optional[str], Dict[str, Any]]] = None
+
     class MockHttpResponse(RequestsTransportResponse):
-        def __init__(self, request: HttpRequest, status_code: int, content:Optional[Dict[str, Any]]):
+        def __init__(self, request: HttpRequest, status_code: int, content: Optional[Any] = None):
             self.request: HttpRequest = request
             # This is actually never None, and set by all implementations after the call to
             # __init__ of this class. This class is also a legacy impl, so it's risky to change it
diff --git a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
index 887be44f2273..4194fc5672b7 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
@@ -215,9 +215,9 @@ def test_get_applicable_regional_endpoints_excluded_regions(self, test_type):
             location_cache.perform_on_database_account_read(database_account)
 
             # Init requests and set excluded regions on requests
-            write_doc_request = RequestObject(ResourceType.Document, _OperationType.Create, None)
+            write_doc_request = RequestObject(ResourceType.Document, _OperationType.Create, {})
             write_doc_request.excluded_locations = excluded_locations_on_requests
-            read_doc_request = RequestObject(ResourceType.Document, _OperationType.Read, None)
+            read_doc_request = RequestObject(ResourceType.Document, _OperationType.Read, {})
             read_doc_request.excluded_locations = excluded_locations_on_requests
 
             # Test if read endpoints were correctly filtered on client level
@@ -247,7 +247,7 @@ def test_set_excluded_locations_for_requests(self):
         options: Mapping[str, Any] = {"excludedLocations": excluded_locations}
 
         expected_excluded_locations = excluded_locations
-        read_doc_request = RequestObject(ResourceType.Document, _OperationType.Create, None)
+        read_doc_request = RequestObject(ResourceType.Document, _OperationType.Create, {})
         read_doc_request.set_excluded_location_from_options(options)
         actual_excluded_locations = read_doc_request.excluded_locations
         assert actual_excluded_locations == expected_excluded_locations
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
new file mode 100644
index 000000000000..4b805e38c78d
--- /dev/null
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -0,0 +1,191 @@
+# The MIT License (MIT)
+# Copyright (c) Microsoft Corporation. All rights reserved.
+import os
+import unittest
+import uuid
+from time import sleep
+
+import pytest
+from azure.core.exceptions import ServiceResponseError
+
+import test_config
+from azure.cosmos import CosmosClient, _partition_health_tracker, _location_cache, PartitionKey
+from azure.cosmos.exceptions import CosmosHttpResponseError
+from _fault_injection_transport import FaultInjectionTransport
+from test_per_partition_circuit_breaker_mm import create_doc, operations, REGION_1, \
+    REGION_2, PK_VALUE, CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH, DELETE_ALL_ITEMS_BY_PARTITION_KEY
+
+@pytest.fixture(scope="class", autouse=True)
+def setup_teardown():
+    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
+    yield
+    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "False"
+
+
+def create_errors():
+    errors = []
+    error_codes = [403, 408, 500, 502, 503]
+    for error_code in error_codes:
+        if error_code == 403:
+            errors.append(CosmosHttpResponseError(
+                status_code=error_code,
+                message="Some injected error.",
+                sub_status=3))
+        else:
+            errors.append(CosmosHttpResponseError(
+                status_code=error_code,
+                message="Some injected error."))
+    return errors
+
+def write_operations_and_errors():
+    write_operations = [CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH]
+    errors = create_errors()
+    params = []
+    for write_operation in write_operations:
+        for error in errors:
+            params.append((write_operation, error))
+
+    return params
+
+def perform_write_operation(operation, container, fault_injection_container, doc_id, pk):
+    resp = None
+    doc = {'id': doc_id,
+           'pk': pk,
+           'name': 'sample document',
+           'key': 'value'}
+    if operation == CREATE:
+        resp = fault_injection_container.create_item(body=doc)
+    elif operation == UPSERT:
+        resp = fault_injection_container.upsert_item(body=doc)
+    elif operation == REPLACE:
+        container.create_item(body=doc)
+        sleep(1)
+        new_doc = {'id': doc_id,
+                   'pk': pk,
+                   'name': 'sample document' + str(uuid),
+                   'key': 'value'}
+        resp = fault_injection_container.replace_item(item=doc['id'], body=new_doc)
+    elif operation == DELETE:
+        container.create_item(body=doc)
+        sleep(1)
+        resp = fault_injection_container.delete_item(item=doc['id'], partition_key=doc['pk'])
+    elif operation == PATCH:
+        container.create_item(body=doc)
+        sleep(1)
+        patch_ops = [{"op": "incr", "path": "/company", "value": 3}]
+        resp = fault_injection_container.patch_item(item=doc['id'], partition_key=doc['pk'], patch_operations=patch_ops)
+    elif operation == BATCH:
+        batch_operations = [
+            ("create", (doc,)),
+            ("upsert", (doc,)),
+            ("upsert", (doc,)),
+            ("upsert", (doc,)),
+        ]
+        resp = fault_injection_container.execute_item_batch(batch_operations, partition_key=doc['pk'])
+    # this will need to be emulator only
+    elif operation == DELETE_ALL_ITEMS_BY_PARTITION_KEY:
+        container.create_item(body=doc)
+        resp = fault_injection_container.delete_all_items_by_partition_key(pk)
+    return resp
+
+# These tests assume that the configured live account has one main write region and one secondary read region.
+
+@pytest.mark.cosmosPerPartitionAutomaticFailover
+class TestPerPartitionAutomaticFailover:
+    host = test_config.TestConfig.host
+    master_key = test_config.TestConfig.masterKey
+    connectionPolicy = test_config.TestConfig.connectionPolicy
+    TEST_DATABASE_ID = "test_config.TestConfig.TEST_DATABASE_ID"
+    TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
+
+    def setup_method_with_custom_transport(self, custom_transport, default_endpoint=host, **kwargs):
+        container_id = kwargs.pop("container_id", None)
+        if not container_id:
+            container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
+        client = CosmosClient(default_endpoint, self.master_key, consistency_level="Session",
+                              preferred_locations=[REGION_1, REGION_2],
+                              transport=custom_transport, **kwargs)
+        db = client.create_database_if_not_exists(self.TEST_DATABASE_ID)
+        container = db.create_container_if_not_exists(container_id, PartitionKey(path="/pk"),)
+        return {"client": client, "db": db, "col": container}
+
+    def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
+        custom_transport = FaultInjectionTransport()
+        # two documents targeted to same partition, one will always fail and the other will succeed
+        doc_fail_id = str(uuid.uuid4())
+        doc_success_id = str(uuid.uuid4())
+        predicate = lambda r: FaultInjectionTransport.predicate_req_for_document_with_id(r, doc_fail_id)
+        # The MockRequest only gets used to create the MockHttpResponse
+        mock_request = FaultInjectionTransport.MockHttpRequest(url=self.host)
+        if is_batch:
+            success_response = FaultInjectionTransport.MockHttpResponse(mock_request, 200, [{"statusCode": 200}],)
+        else:
+            success_response = FaultInjectionTransport.MockHttpResponse(mock_request, 200)
+        custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
+                                   after_max_count=success_response)
+        is_get_account_predicate = lambda r: FaultInjectionTransport.predicate_is_database_account_call(r)
+        # Set the database account response to have PPAF enabled
+        ppaf_enabled_database_account = \
+            lambda r, inner: FaultInjectionTransport.transform_topology_ppaf_enabled(
+                inner=inner,
+                enable_per_partition_failover=True)
+        custom_transport.add_response_transformation(
+            is_get_account_predicate,
+            ppaf_enabled_database_account)
+        setup = self.setup_method_with_custom_transport(None, default_endpoint=self.host, **kwargs)
+        custom_setup = self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
+        return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
+
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors())
+    def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
+        # This test validates that the partition info cache is updated correctly upon failures, and that the
+        # per-partition automatic failover logic routes requests to the next available regional endpoint
+        error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, 1, write_operation == BATCH)
+        container = setup['col']
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+
+        # Create a document to populate the per-partition GEM partition range info cache
+        fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
+                                                    'name': 'sample document', 'key': 'value'})
+        pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
+        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+
+        # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
+        perform_write_operation(
+            write_operation,
+            container,
+            fault_injection_container,
+            doc_fail_id,
+            PK_VALUE)
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
+        assert len(partition_info.unavailable_regional_endpoints) == 1
+        assert initial_endpoint in partition_info.unavailable_regional_endpoints
+        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+
+        # Now we run another request to see how the cache gets updated
+        perform_write_operation(
+            write_operation,
+            container,
+            fault_injection_container,
+            str(uuid.uuid4()),
+            PK_VALUE)
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the cache is empty, since the request going to the second regional endpoint failed
+        # Once we reach the point of all available regions being marked as unavailable, the cache is cleared
+        assert len(partition_info.unavailable_regional_endpoints) == 0
+        assert initial_endpoint not in partition_info.unavailable_regional_endpoints
+        assert partition_info.current_regional_endpoint is None
+
+
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors())
+    def test_ppaf_exclude_regions(self, write_operation, error):
+        # TODO: PPAF - finish this test
+        return
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
index f8e7369716a2..bda43946a544 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
@@ -15,7 +15,7 @@
 from azure.cosmos.exceptions import CosmosHttpResponseError
 from _fault_injection_transport import FaultInjectionTransport
 from test_per_partition_circuit_breaker_mm import create_doc, write_operations_and_errors, operations, REGION_1, \
-    REGION_2, PK_VALUE, perform_write_operation, perform_read_operation
+    REGION_2, PK_VALUE, perform_write_operation, perform_read_operation, READ, CREATE, validate_stats
 
 COLLECTION = "created_collection"
 
@@ -38,6 +38,7 @@ def validate_unhealthy_partitions(global_endpoint_manager,
     assert unhealthy_partitions == expected_unhealthy_partitions
 
 @pytest.mark.cosmosCircuitBreakerMultiRegion
+@pytest.mark.cosmosPerPartitionAutomaticFailover
 class TestPerPartitionCircuitBreakerSmMrr:
     host = test_config.TestConfig.host
     master_key = test_config.TestConfig.masterKey
diff --git a/sdk/cosmos/azure-cosmos/tests/test_service_retry_policies_async.py b/sdk/cosmos/azure-cosmos/tests/test_service_retry_policies_async.py
index b0b60ef7730c..208d9d4291e4 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_service_retry_policies_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_service_retry_policies_async.py
@@ -10,7 +10,7 @@
 from azure.core.exceptions import ServiceRequestError, ServiceResponseError
 
 import test_config
-from azure.cosmos import DatabaseAccount, _location_cache
+from azure.cosmos import DatabaseAccount
 from azure.cosmos._location_cache import RegionalRoutingContext
 from azure.cosmos.aio import CosmosClient, _retry_utility_async, _global_endpoint_manager_async
 from azure.cosmos.exceptions import CosmosHttpResponseError
diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json
index 8edaff6fe68d..fb21bacff0e1 100644
--- a/sdk/cosmos/live-platform-matrix.json
+++ b/sdk/cosmos/live-platform-matrix.json
@@ -59,6 +59,23 @@
         }
       }
     },
+    {
+      "PerPartitionAutomaticFailoverTestConfig": {
+        "Ubuntu2004_313_ppaf": {
+          "OSVmImage": "env:LINUXVMIMAGE",
+          "Pool": "env:LINUXPOOL",
+          "PythonVersion": "3.13",
+          "CoverageArg": "--disablecov",
+          "TestSamples": "false",
+          "TestMarkArgument": "cosmosPerPartitionAutomaticFailover"
+        }
+      },
+      "ArmConfig": {
+        "MultiRegion": {
+          "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; perPartitionAutomaticFailoverEnabled = 'True' }"
+        }
+      }
+    },
     {
       "MacTestConfig": {
         "macos311_search_query": {
diff --git a/sdk/cosmos/test-resources.bicep b/sdk/cosmos/test-resources.bicep
index 735c1a0e66ee..f6088ec122a6 100644
--- a/sdk/cosmos/test-resources.bicep
+++ b/sdk/cosmos/test-resources.bicep
@@ -15,6 +15,9 @@ param location string = resourceGroup().location
 @description('Whether Per Partition Circuit Breaker should be enabled.')
 param circuitBreakerEnabled string = 'False'
 
+@description('Whether Per Partition Automatic Failover should be enabled.')
+param perPartitionAutomaticFailoverEnabled string = 'False'
+
 @description('The api version to be used by Bicep to create resources')
 param apiVersion string = '2023-04-15'
 
@@ -105,5 +108,6 @@ resource accountName_roleAssignmentId 'Microsoft.DocumentDB/databaseAccounts/sql
 }
 
 output AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER string = circuitBreakerEnabled
+output AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER string = perPartitionAutomaticFailoverEnabled
 output ACCOUNT_HOST string = reference(resourceId, apiVersion).documentEndpoint
 output ACCOUNT_KEY string = listKeys(resourceId, apiVersion).primaryMasterKey

From b8228e7e7c91dd3f34a026da24c48db9008b6058 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Sun, 15 Jun 2025 20:50:39 -0400
Subject: [PATCH 02/68] async changes

---
 .../azure/cosmos/_cosmos_client_connection.py |  11 +-
 ...anager_per_partition_automatic_failover.py |  22 ++-
 .../cosmos/_timeout_failover_retry_policy.py  |   5 +-
 .../azure/cosmos/aio/_asynchronous_request.py |   5 +-
 .../aio/_cosmos_client_connection_async.py    |  11 +-
 ..._endpoint_manager_circuit_breaker_async.py |   2 +-
 ..._per_partition_automatic_failover_async.py | 149 ++++++++++++++++++
 .../azure/cosmos/aio/_retry_utility_async.py  |   5 +-
 .../tests/_fault_injection_transport.py       |  12 +-
 .../tests/_fault_injection_transport_async.py |  57 ++++++-
 .../test_per_partition_automatic_failover.py  |  75 ++-------
 ..._per_partition_automatic_failover_async.py | 136 ++++++++++++++++
 .../test_per_partition_circuit_breaker_mm.py  |  11 +-
 ..._per_partition_circuit_breaker_mm_async.py |   4 +-
 ..._partition_circuit_breaker_sm_mrr_async.py |   1 +
 15 files changed, 402 insertions(+), 104 deletions(-)
 create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
 create mode 100644 sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
index aeda60b03ce3..c135277f6c9d 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
@@ -2621,15 +2621,16 @@ def GetDatabaseAccount(
             database_account._ReadableLocations = result[Constants.ReadableLocations]
         if Constants.EnableMultipleWritableLocations in result:
             database_account._EnableMultipleWritableLocations = result[
-                Constants.EnableMultipleWritableLocations
-            ]
-        # TODO: PPAF - Verify that this is the correct variable from the service
-        if Constants.EnablePerPartitionFailoverBehavior in result:
-            database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]
+                Constants.EnableMultipleWritableLocations]
 
         self.UseMultipleWriteLocations = (
                 self.connection_policy.UseMultipleWriteLocations and database_account._EnableMultipleWritableLocations
         )
+
+        # TODO: Verify that this is the correct variable from the service
+        if Constants.EnablePerPartitionFailoverBehavior in result:
+            database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]
+
         if response_hook:
             response_hook(last_response_headers, result)
         return database_account
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 03a3080cdefe..bbce867d4b53 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -8,7 +8,7 @@
 import os
 import threading
 
-from typing import Dict, List, Set, TYPE_CHECKING, Optional
+from typing import Dict, Set, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
 from azure.cosmos._constants import _Constants as Constants
@@ -24,10 +24,12 @@
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
+# pylint: disable=name-too-long, protected-access
+
 class PartitionLevelFailoverInfo:
     """
     Holds information about the partition level regional failover.
-    It is used to track the partition key range and the regions where it is available.
+    Used to track the partition key range and the regions where it is available.
     """
     def __init__(self):
         self.unavailable_regional_endpoints = set()
@@ -73,11 +75,13 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        per_partition_automatic_failover_config_enabled = os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
-                                                        Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true"
+        per_partition_automatic_failover_config_enabled = (
+                os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
+                               Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true")
 
-        # TODO: PPAF - This check here needs to be verified once we test against a live account with the config enabled.
-        if not per_partition_automatic_failover_config_enabled or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
+        # TODO: This check here needs to be verified once we test against a live account with the config enabled.
+        if (not per_partition_automatic_failover_config_enabled or
+                not self._database_account_cache._EnablePerPartitionFailoverBehavior):
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
@@ -100,7 +104,7 @@ def resolve_service_endpoint_for_partition(
         if self.is_per_partition_automatic_failover_applicable(request) and pk_range_wrapper:
             # If per partition automatic failover is applicable, we check partition unavailability
             if pk_range_wrapper in self.partition_range_to_failover_info:
-                print("Resolving service endpoint for partition with per partition automatic failover enabled.")
+                logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
                 partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
                 if request.location_endpoint_to_route is not None:
                     if request.location_endpoint_to_route in partition_failover_info.unavailable_regional_endpoints:
@@ -121,7 +125,6 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info = PartitionLevelFailoverInfo()
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
-
             return self._resolve_service_endpoint(request)
         else:
             return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
@@ -132,6 +135,9 @@ def compute_available_preferred_regions(
     ) -> Set[str]:
         """
         Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
+        :param RequestObject request: The request object containing the routing context.
+        :return: A set of available regional endpoints.
+        :rtype: Set[str]
         """
         excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
         preferred_locations = self.PreferredLocations
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index 70f6fdd2e299..8e60e0f7dcf6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -48,10 +48,11 @@ def ShouldRetry(self, _exception):
     def resolve_next_region_service_endpoint(self):
         if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
             # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
-            # and resolve the service endpoint for the partition range - otherwise, continue with the default retry logic
+            # and resolve the service endpoint for the partition range - otherwise, continue with default retry logic
             partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
             partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
-            return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
+            return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request,
+                                                                                       self.pk_range_wrapper)
 
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 79e674eaa31c..1cd2a22039b4 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -76,8 +76,9 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
         base_url = request_params.endpoint_override
     else:
         pk_range_wrapper = None
-        if global_endpoint_manager.is_circuit_breaker_applicable(request_params):
-            # Circuit breaker is applicable, so we need to use the endpoint from the request
+        if (global_endpoint_manager.is_circuit_breaker_applicable(request_params) or
+                global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_params)):
+            # Circuit breaker or per-partition failover are applicable, so we need to use the endpoint from the request
             pk_range_wrapper = await global_endpoint_manager.create_pk_range_wrapper(request_params)
         base_url = global_endpoint_manager.resolve_service_endpoint_for_partition(request_params, pk_range_wrapper)
     if not request.url.startswith(base_url):
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py
index cbcd3ccafba7..a56cb5777406 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py
@@ -48,8 +48,8 @@
     DistributedTracingPolicy,
     ProxyPolicy)
 from azure.core.utils import CaseInsensitiveDict
-from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import (
-    _GlobalPartitionEndpointManagerForCircuitBreakerAsync)
+from azure.cosmos.aio._global_partition_endpoint_manager_per_partition_automatic_failover_async import (
+    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync)
 
 from .. import _base as base
 from .._base import _build_properties_cache
@@ -174,7 +174,7 @@ def __init__( # pylint: disable=too-many-statements
         # Keeps the latest response headers from the server.
         self.last_response_headers: CaseInsensitiveDict = CaseInsensitiveDict()
         self.UseMultipleWriteLocations = False
-        self._global_endpoint_manager = _GlobalPartitionEndpointManagerForCircuitBreakerAsync(self)
+        self._global_endpoint_manager = _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync(self)
 
         retry_policy = None
         if isinstance(self.connection_policy.ConnectionRetryConfiguration, AsyncHTTPPolicy):
@@ -452,6 +452,11 @@ async def GetDatabaseAccount(
         self.UseMultipleWriteLocations = (
                 self.connection_policy.UseMultipleWriteLocations and database_account._EnableMultipleWritableLocations
         )
+
+        # TODO: Verify that this is the correct variable from the service
+        if Constants.EnablePerPartitionFailoverBehavior in result:
+            database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]
+
         return database_account
 
     async def _GetDatabaseAccountCheck(
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py
index 78e8b551ee7a..0150fd1dd025 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py
@@ -101,7 +101,7 @@ async def record_failure(
             if pk_range_wrapper:
                 self.global_partition_endpoint_manager_core.record_failure(request, pk_range_wrapper)
 
-    def resolve_service_endpoint_for_partition(
+    def _resolve_service_endpoint_for_partition_circuit_breaker(
             self,
             request: RequestObject,
             pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
new file mode 100644
index 000000000000..a9b974483614
--- /dev/null
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -0,0 +1,149 @@
+# The MIT License (MIT)
+# Copyright (c) 2025 Microsoft Corporation
+
+"""Class for global endpoint manager for per partition automatic failover. This class inherits the circuit breaker
+endpoint manager, since enabling per partition automatic failover also enables the circuit breaker logic.
+"""
+import logging
+import os
+import threading
+
+from typing import Dict, Set, TYPE_CHECKING, Optional
+
+from azure.cosmos.http_constants import ResourceType
+from azure.cosmos._constants import _Constants as Constants
+from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import \
+    _GlobalPartitionEndpointManagerForCircuitBreakerAsync
+from azure.cosmos.documents import _OperationType
+
+from azure.cosmos._request_object import RequestObject
+from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
+
+if TYPE_CHECKING:
+    from azure.cosmos._cosmos_client_connection import CosmosClientConnection
+
+logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
+
+# pylint: disable=name-too-long, protected-access
+
+class PartitionLevelFailoverInfo:
+    """
+    Holds information about the partition level regional failover.
+    Used to track the partition key range and the regions where it is available.
+    """
+    def __init__(self):
+        self.unavailable_regional_endpoints = set()
+        self.current_regional_endpoint = None
+        self._lock = threading.Lock()
+
+    def try_move_to_next_location(self, available_account_regional_endpoints: Set[str], request: RequestObject) -> bool:
+        with self._lock:
+            failed_regional_endpoint = request.location_endpoint_to_route
+            if failed_regional_endpoint != self.current_regional_endpoint:
+                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
+                request.route_to_location(self.current_regional_endpoint)
+                return True
+
+            for regional_endpoint in available_account_regional_endpoints:
+                if regional_endpoint == self.current_regional_endpoint:
+                    continue
+
+                if regional_endpoint in self.unavailable_regional_endpoints:
+                    continue
+
+                self.current_regional_endpoint = regional_endpoint
+                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
+                request.route_to_location(self.current_regional_endpoint)
+                return True
+
+            return False
+
+class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync(_GlobalPartitionEndpointManagerForCircuitBreakerAsync):
+    """
+    This internal class implements the logic for partition endpoint management for
+    geo-replicated database accounts.
+    """
+    def __init__(self, client: "CosmosClientConnection"):
+        super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync, self).__init__(client)
+        self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
+
+    def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
+        if not request:
+            return False
+
+        if (self.location_cache.can_use_multiple_write_locations_for_request(request)
+                or _OperationType.IsReadOnlyOperation(request.operation_type)):
+            return False
+
+        per_partition_automatic_failover_config_enabled = (
+                os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
+                               Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true")
+
+        # TODO: This check here needs to be verified once we test against a live account with the config enabled.
+        if (not per_partition_automatic_failover_config_enabled or
+                not self._database_account_cache._EnablePerPartitionFailoverBehavior):
+            return False
+
+        # if we have at most one region available in the account, we cannot do per partition automatic failover
+        available_regions = self.compute_available_preferred_regions(request)
+        if len(available_regions) <= 1:
+            return False
+
+        # if the request is not for a document or if the request is not executing a stored procedure, return False
+        if (request.resource_type != ResourceType.Document and
+                request.operation_type != _OperationType.ExecuteJavaScript):
+            return False
+
+        return True
+
+    def resolve_service_endpoint_for_partition(
+            self,
+            request: RequestObject,
+            pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
+    ) -> str:
+        if self.is_per_partition_automatic_failover_applicable(request) and pk_range_wrapper:
+            # If per partition automatic failover is applicable, we check partition unavailability
+            if pk_range_wrapper in self.partition_range_to_failover_info:
+                logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
+                partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
+                if request.location_endpoint_to_route is not None:
+                    if request.location_endpoint_to_route in partition_failover_info.unavailable_regional_endpoints:
+                        # If the current region is unavailable, we try to move to the next available region
+                        if not partition_failover_info.try_move_to_next_location(
+                                self.compute_available_preferred_regions(request),
+                                request):
+                            logger.info("All available regions for partition are unavailable. Refreshing cache.")
+                            # If no other region is available, we invalidate the cache and start once again from our
+                            # main write region in the account configurations
+                            self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
+                            request.clear_route_to_location()
+                            return self._resolve_service_endpoint(request)
+                    else:
+                        # Update the current regional endpoint to whatever the request is routing to
+                        partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+            else:
+                partition_failover_info = PartitionLevelFailoverInfo()
+                partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+                self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
+            return self._resolve_service_endpoint(request)
+        else:
+            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
+
+    def compute_available_preferred_regions(
+            self,
+            request: RequestObject
+    ) -> Set[str]:
+        """
+        Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
+        :param RequestObject request: The request object containing the routing context.
+        :return: A set of available regional endpoints.
+        :rtype: Set[str]
+        """
+        excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
+        preferred_locations = self.PreferredLocations
+        available_regions = [item for item in preferred_locations if item not in excluded_locations]
+        available_regional_endpoints = {
+            self.location_cache.account_read_regional_routing_contexts_by_location[region].primary_endpoint
+            for region in available_regions
+        }
+        return available_regional_endpoints
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 33b9c0785b38..7884f9060183 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -64,11 +64,12 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
     :rtype: tuple of (dict, dict)
     """
     pk_range_wrapper = None
-    if args and global_endpoint_manager.is_circuit_breaker_applicable(args[0]):
+    if args and (global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]) or
+                 global_endpoint_manager.is_circuit_breaker_applicable(args[0])):
         pk_range_wrapper = await global_endpoint_manager.create_pk_range_wrapper(args[0])
     # instantiate all retry policies here to be applied for each request execution
     endpointDiscovery_retry_policy = _endpoint_discovery_retry_policy.EndpointDiscoveryRetryPolicy(
-        client.connection_policy, global_endpoint_manager, *args
+        client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args
     )
     database_account_retry_policy = _database_account_retry_policy.DatabaseAccountRetryPolicy(
         client.connection_policy
diff --git a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
index 127a21cb0bd4..705f5e4090ee 100644
--- a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
+++ b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
@@ -259,7 +259,7 @@ def transform_topology_swr_mrr(
             writable_locations[0]["name"] = write_region_name
             readable_locations.append({"name": read_region_name, "databaseAccountEndpoint" : test_config.TestConfig.local_host})
             FaultInjectionTransport.logger.info("Transformed Account Topology: {}".format(result))
-            # TODO: PPAF - need to verify below behavior against actual Cosmos DB service response
+            # TODO: need to verify below behavior against actual Cosmos DB service response
             if enable_per_partition_failover:
                 result["enablePerPartitionFailoverBehavior"] = True
             request: HttpRequest = response.request
@@ -268,9 +268,8 @@ def transform_topology_swr_mrr(
         return response
 
     @staticmethod
-    def transform_topology_ppaf_enabled(
-            inner: Callable[[], RequestsTransportResponse],
-            enable_per_partition_failover: bool = False) -> RequestsTransportResponse:
+    def transform_topology_ppaf_enabled( # cspell:disable-line
+            inner: Callable[[], RequestsTransportResponse]) -> RequestsTransportResponse:
 
         response = inner()
         if not FaultInjectionTransport.predicate_is_database_account_call(response.request):
@@ -280,10 +279,9 @@ def transform_topology_ppaf_enabled(
         if response.status_code == 200 and data:
             data = data.decode("utf-8")
             result = json.loads(data)
+            # TODO: need to verify below behavior against actual Cosmos DB service response
+            result["enablePerPartitionFailoverBehavior"] = True
             FaultInjectionTransport.logger.info("Transformed Account Topology: {}".format(result))
-            # TODO: PPAF - need to verify below behavior against actual Cosmos DB service response
-            if enable_per_partition_failover:
-                result["enablePerPartitionFailoverBehavior"] = True
             request: HttpRequest = response.request
             return FaultInjectionTransport.MockHttpResponse(request, 200, result)
 
diff --git a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport_async.py b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport_async.py
index 994357323b81..08d0c47e15a5 100644
--- a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport_async.py
@@ -60,8 +60,29 @@ async def error_with_counter(self, error: Exception) -> Exception:
         self.counters[ERROR_WITH_COUNTER] += 1
         return error
 
-    def add_fault(self, predicate: Callable[[HttpRequest], bool], fault_factory: Callable[[HttpRequest], Awaitable[Exception]]):
-        self.faults.append({"predicate": predicate, "apply": fault_factory})
+    def add_fault(self,
+                  predicate: Callable[[HttpRequest], bool],
+                  fault_factory: Callable[[HttpRequest], Awaitable[Exception]],
+                  max_inner_count: Optional[int] = None,
+                  after_max_count: Optional[Callable[[HttpRequest], AioHttpTransportResponse]] = None):
+        """ Adds a fault to the transport that will be applied when the predicate matches the request.
+        :param Callable predicate: A callable that takes an HttpRequest and returns True if the fault should be applied.
+        :param Callable fault_factory: A callable that takes an HttpRequest and returns an Exception to be raised.
+        :param int max_inner_count: Optional maximum number of times the fault can be applied for one request.
+            If None, the fault will be applied every time the predicate matches.
+        :param Callable after_max_count: Optional callable that takes an HttpRequest and returns a
+            AioHttpTransportResponse. Used to return a different response after the maximum number of faults has
+            been applied. Can only be used if `max_inner_count` is not None.
+        """
+        if max_inner_count is not None:
+            if after_max_count is not None:
+                self.faults.append({"predicate": predicate, "apply": fault_factory, "after_max_count": after_max_count,
+                                    "max_count": max_inner_count, "current_count": 0})
+            else:
+                self.faults.append({"predicate": predicate, "apply": fault_factory,
+                                    "max_count": max_inner_count, "current_count": 0})
+        else:
+            self.faults.append({"predicate": predicate, "apply": fault_factory})
 
     def add_response_transformation(self, predicate: Callable[[HttpRequest], bool], response_transformation: Callable[[HttpRequest, Callable[[HttpRequest], AioHttpTransportResponse]], AioHttpTransportResponse]):
         self.responseTransformations.append({
@@ -82,6 +103,16 @@ async def send(self, request: HttpRequest, *, stream: bool = False, proxies: Opt
         # find the first fault Factory with matching predicate if any
         first_fault_factory = FaultInjectionTransportAsync.__first_item(iter(self.faults), lambda f: f["predicate"](request))
         if first_fault_factory:
+            if "max_count" in first_fault_factory:
+                FaultInjectionTransportAsync.logger.info(f"Found fault factory with max count {first_fault_factory['max_count']}")
+                if first_fault_factory["current_count"] >= first_fault_factory["max_count"]:
+                    first_fault_factory["current_count"] = 0 # reset counter
+                    if "after_max_count" in first_fault_factory:
+                        FaultInjectionTransportAsync.logger.info("Max count reached, returning after_max_count")
+                        return first_fault_factory["after_max_count"]
+                    FaultInjectionTransportAsync.logger.info("Max count reached, skipping fault injection")
+                    return await super().send(request, proxies=proxies, **config)
+                first_fault_factory["current_count"] += 1
             FaultInjectionTransportAsync.logger.info("--> FaultInjectionTransportAsync.ApplyFaultInjection")
             injected_error = await first_fault_factory["apply"](request)
             FaultInjectionTransportAsync.logger.info("Found to-be-injected error {}".format(injected_error))
@@ -222,6 +253,26 @@ async def transform_topology_swr_mrr(
 
         return response
 
+    @staticmethod
+    async def transform_topology_ppaf_enabled( # cspell:disable-line
+            inner: Callable[[], Awaitable[AioHttpTransportResponse]]) -> AioHttpTransportResponse:
+
+        response = await inner()
+        if not FaultInjectionTransportAsync.predicate_is_database_account_call(response.request):
+            return response
+
+        data = response.body()
+        if response.status_code == 200 and data:
+            data = data.decode("utf-8")
+            result = json.loads(data)
+            # TODO: need to verify below behavior against actual Cosmos DB service response
+            result["enablePerPartitionFailoverBehavior"] = True
+            FaultInjectionTransportAsync.logger.info("Transformed Account Topology: {}".format(result))
+            request: HttpRequest = response.request
+            return FaultInjectionTransportAsync.MockHttpResponse(request, 200, result)
+
+        return response
+
     @staticmethod
     async def transform_topology_mwr(
             first_region_name: str,
@@ -260,7 +311,7 @@ async def transform_topology_mwr(
         return response
 
     class MockHttpResponse(AioHttpTransportResponse):
-        def __init__(self, request: HttpRequest, status_code: int, content:Optional[Dict[str, Any]]):
+        def __init__(self, request: HttpRequest, status_code: int, content: Optional[Any]=None):
             self.request: HttpRequest = request
             # This is actually never None, and set by all implementations after the call to
             # __init__ of this class. This class is also a legacy impl, so it's risky to change it
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 4b805e38c78d..3c88e8c59911 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -3,17 +3,17 @@
 import os
 import unittest
 import uuid
-from time import sleep
 
 import pytest
-from azure.core.exceptions import ServiceResponseError
 
 import test_config
-from azure.cosmos import CosmosClient, _partition_health_tracker, _location_cache, PartitionKey
+from azure.cosmos import CosmosClient
 from azure.cosmos.exceptions import CosmosHttpResponseError
 from _fault_injection_transport import FaultInjectionTransport
-from test_per_partition_circuit_breaker_mm import create_doc, operations, REGION_1, \
-    REGION_2, PK_VALUE, CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH, DELETE_ALL_ITEMS_BY_PARTITION_KEY
+from test_per_partition_circuit_breaker_mm import (REGION_1, REGION_2, PK_VALUE, BATCH,
+                                                   write_operations_and_errors, perform_write_operation)
+
+# cspell:disable-file
 
 @pytest.fixture(scope="class", autouse=True)
 def setup_teardown():
@@ -37,57 +37,6 @@ def create_errors():
                 message="Some injected error."))
     return errors
 
-def write_operations_and_errors():
-    write_operations = [CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH]
-    errors = create_errors()
-    params = []
-    for write_operation in write_operations:
-        for error in errors:
-            params.append((write_operation, error))
-
-    return params
-
-def perform_write_operation(operation, container, fault_injection_container, doc_id, pk):
-    resp = None
-    doc = {'id': doc_id,
-           'pk': pk,
-           'name': 'sample document',
-           'key': 'value'}
-    if operation == CREATE:
-        resp = fault_injection_container.create_item(body=doc)
-    elif operation == UPSERT:
-        resp = fault_injection_container.upsert_item(body=doc)
-    elif operation == REPLACE:
-        container.create_item(body=doc)
-        sleep(1)
-        new_doc = {'id': doc_id,
-                   'pk': pk,
-                   'name': 'sample document' + str(uuid),
-                   'key': 'value'}
-        resp = fault_injection_container.replace_item(item=doc['id'], body=new_doc)
-    elif operation == DELETE:
-        container.create_item(body=doc)
-        sleep(1)
-        resp = fault_injection_container.delete_item(item=doc['id'], partition_key=doc['pk'])
-    elif operation == PATCH:
-        container.create_item(body=doc)
-        sleep(1)
-        patch_ops = [{"op": "incr", "path": "/company", "value": 3}]
-        resp = fault_injection_container.patch_item(item=doc['id'], partition_key=doc['pk'], patch_operations=patch_ops)
-    elif operation == BATCH:
-        batch_operations = [
-            ("create", (doc,)),
-            ("upsert", (doc,)),
-            ("upsert", (doc,)),
-            ("upsert", (doc,)),
-        ]
-        resp = fault_injection_container.execute_item_batch(batch_operations, partition_key=doc['pk'])
-    # this will need to be emulator only
-    elif operation == DELETE_ALL_ITEMS_BY_PARTITION_KEY:
-        container.create_item(body=doc)
-        resp = fault_injection_container.delete_all_items_by_partition_key(pk)
-    return resp
-
 # These tests assume that the configured live account has one main write region and one secondary read region.
 
 @pytest.mark.cosmosPerPartitionAutomaticFailover
@@ -105,8 +54,8 @@ def setup_method_with_custom_transport(self, custom_transport, default_endpoint=
         client = CosmosClient(default_endpoint, self.master_key, consistency_level="Session",
                               preferred_locations=[REGION_1, REGION_2],
                               transport=custom_transport, **kwargs)
-        db = client.create_database_if_not_exists(self.TEST_DATABASE_ID)
-        container = db.create_container_if_not_exists(container_id, PartitionKey(path="/pk"),)
+        db = client.get_database_client(self.TEST_DATABASE_ID)
+        container = db.get_container_client(container_id)
         return {"client": client, "db": db, "col": container}
 
     def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
@@ -126,9 +75,7 @@ def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
         is_get_account_predicate = lambda r: FaultInjectionTransport.predicate_is_database_account_call(r)
         # Set the database account response to have PPAF enabled
         ppaf_enabled_database_account = \
-            lambda r, inner: FaultInjectionTransport.transform_topology_ppaf_enabled(
-                inner=inner,
-                enable_per_partition_failover=True)
+            lambda r, inner: FaultInjectionTransport.transform_topology_ppaf_enabled(inner=inner)
         custom_transport.add_response_transformation(
             is_get_account_predicate,
             ppaf_enabled_database_account)
@@ -136,7 +83,7 @@ def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
         custom_setup = self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors())
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
     def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         # This test validates that the partition info cache is updated correctly upon failures, and that the
         # per-partition automatic failover logic routes requests to the next available regional endpoint
@@ -180,9 +127,9 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         assert partition_info.current_regional_endpoint is None
 
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors())
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
     def test_ppaf_exclude_regions(self, write_operation, error):
-        # TODO: PPAF - finish this test
+        # TODO: finish this test
         return
 
 
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
new file mode 100644
index 000000000000..7a133d1f34cc
--- /dev/null
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -0,0 +1,136 @@
+# The MIT License (MIT)
+# Copyright (c) Microsoft Corporation. All rights reserved.
+import os
+import unittest
+import uuid
+
+import asyncio
+import pytest
+from typing import Dict, Any
+
+import test_config
+from azure.core.pipeline.transport._aiohttp import AioHttpTransport
+from azure.cosmos.aio import CosmosClient
+from _fault_injection_transport import FaultInjectionTransport
+from _fault_injection_transport_async import FaultInjectionTransportAsync
+from test_per_partition_automatic_failover import create_errors
+from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors
+from test_per_partition_circuit_breaker_mm_async import perform_write_operation
+
+# cspell:disable-file
+
+@pytest.fixture(scope="class", autouse=True)
+def setup_teardown():
+    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
+    yield
+    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "False"
+
+# These tests assume that the configured live account has one main write region and one secondary read region.
+
+@pytest.mark.cosmosPerPartitionAutomaticFailover
+@pytest.mark.asyncio
+class TestPerPartitionAutomaticFailoverAsync:
+    host = test_config.TestConfig.host
+    master_key = test_config.TestConfig.masterKey
+    connectionPolicy = test_config.TestConfig.connectionPolicy
+    TEST_DATABASE_ID = "test_config.TestConfig.TEST_DATABASE_ID"
+    TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
+
+    async def setup_method_with_custom_transport(self, custom_transport: AioHttpTransport, default_endpoint=host, **kwargs):
+        container_id = kwargs.pop("container_id", None)
+        if not container_id:
+            container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
+        client = CosmosClient(default_endpoint, self.master_key, consistency_level="Session",
+                              preferred_locations=[REGION_1, REGION_2],
+                              transport=custom_transport, **kwargs)
+        db = client.get_database_client(self.TEST_DATABASE_ID)
+        container = db.get_container_client(container_id)
+        return {"client": client, "db": db, "col": container}
+    
+    @staticmethod
+    async def cleanup_method(initialized_objects: Dict[str, Any]):
+        method_client: CosmosClient = initialized_objects["client"]
+        await method_client.close()
+
+    async def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
+        custom_transport = FaultInjectionTransportAsync()
+        # two documents targeted to same partition, one will always fail and the other will succeed
+        doc_fail_id = str(uuid.uuid4())
+        doc_success_id = str(uuid.uuid4())
+        predicate = lambda r: FaultInjectionTransportAsync.predicate_req_for_document_with_id(r, doc_fail_id)
+        # The MockRequest only gets used to create the MockHttpResponse
+        mock_request = FaultInjectionTransport.MockHttpRequest(url=self.host)
+        if is_batch:
+            success_response = FaultInjectionTransportAsync.MockHttpResponse(mock_request, 200, [{"statusCode": 200}],)
+        else:
+            success_response = FaultInjectionTransportAsync.MockHttpResponse(mock_request, 200)
+        custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
+                                   after_max_count=success_response)
+        is_get_account_predicate = lambda r: FaultInjectionTransportAsync.predicate_is_database_account_call(r)
+        # Set the database account response to have PPAF enabled
+        ppaf_enabled_database_account = \
+            lambda r, inner: FaultInjectionTransportAsync.transform_topology_ppaf_enabled(inner=inner)
+        custom_transport.add_response_transformation(
+            is_get_account_predicate,
+            ppaf_enabled_database_account)
+        setup = await self.setup_method_with_custom_transport(None, default_endpoint=self.host, **kwargs)
+        custom_setup = await self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
+        return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
+
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
+    async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation, error):
+        os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
+        # This test validates that the partition info cache is updated correctly upon failures, and that the
+        # per-partition automatic failover logic routes requests to the next available regional endpoint
+        error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(
+            0,
+            error
+        ))
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, 1, write_operation == BATCH)
+        container = setup['col']
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+
+        # Create a document to populate the per-partition GEM partition range info cache
+        await fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
+                                                    'name': 'sample document', 'key': 'value'})
+        pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
+        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+
+        # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
+        await perform_write_operation(
+            write_operation,
+            container,
+            fault_injection_container,
+            doc_fail_id,
+            PK_VALUE)
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
+        assert len(partition_info.unavailable_regional_endpoints) == 1
+        assert initial_endpoint in partition_info.unavailable_regional_endpoints
+        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+
+        # Now we run another request to see how the cache gets updated
+        await perform_write_operation(
+            write_operation,
+            container,
+            fault_injection_container,
+            str(uuid.uuid4()),
+            PK_VALUE)
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the cache is empty, since the request going to the second regional endpoint failed
+        # Once we reach the point of all available regions being marked as unavailable, the cache is cleared
+        assert len(partition_info.unavailable_regional_endpoints) == 0
+        assert initial_endpoint not in partition_info.unavailable_regional_endpoints
+        assert partition_info.current_regional_endpoint is None
+
+
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
+    async def test_ppaf_exclude_regions_async(self, write_operation, error):
+        # TODO: finish this test
+        return
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index 8e0d7f2a3402..3ed516a0a59c 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -50,9 +50,9 @@ def read_operations_and_errors():
 
     return params
 
-def write_operations_and_errors():
+def write_operations_and_errors(error_list=None):
     write_operations = [CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH]
-    errors = create_errors()
+    errors = error_list or create_errors()
     params = []
     for write_operation in write_operations:
         for error in errors:
@@ -69,7 +69,7 @@ def operations():
 
     return operations
 
-def create_errors():
+def create_errors(errors=None):
     errors = []
     error_codes = [408, 500, 502, 503]
     for error_code in error_codes:
@@ -97,7 +97,8 @@ def validate_unhealthy_partitions(global_endpoint_manager,
 def validate_response_uri(response, expected_uri):
     request = response.get_response_headers()["_request"]
     assert request.url.startswith(expected_uri)
-def perform_write_operation(operation, container, fault_injection_container, doc_id, pk, expected_uri):
+
+def perform_write_operation(operation, container, fault_injection_container, doc_id, pk, expected_uri=None):
     doc = {'id': doc_id,
            'pk': pk,
            'name': 'sample document',
@@ -135,7 +136,7 @@ def perform_write_operation(operation, container, fault_injection_container, doc
     elif operation == DELETE_ALL_ITEMS_BY_PARTITION_KEY:
         container.create_item(body=doc)
         resp = fault_injection_container.delete_all_items_by_partition_key(pk)
-    if resp:
+    if resp and expected_uri:
         validate_response_uri(resp, expected_uri)
 
 def perform_read_operation(operation, container, doc_id, pk, expected_uri):
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
index 8f4da45908f6..e67e3eb4e265 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
@@ -23,7 +23,7 @@
 
 COLLECTION = "created_collection"
 
-async def perform_write_operation(operation, container, fault_injection_container, doc_id, pk, expected_uri):
+async def perform_write_operation(operation, container, fault_injection_container, doc_id, pk, expected_uri=None):
     doc = {'id': doc_id,
            'pk': pk,
            'name': 'sample document',
@@ -61,7 +61,7 @@ async def perform_write_operation(operation, container, fault_injection_containe
     elif operation == DELETE_ALL_ITEMS_BY_PARTITION_KEY:
         await container.create_item(body=doc)
         resp = await fault_injection_container.delete_all_items_by_partition_key(pk)
-    if resp:
+    if resp and expected_uri:
         validate_response_uri(resp, expected_uri)
 
 async def perform_read_operation(operation, container, doc_id, pk, expected_uri):
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
index 9779b9c68362..d1925988fca6 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
@@ -23,6 +23,7 @@
 COLLECTION = "created_collection"
 
 @pytest.mark.cosmosCircuitBreakerMultiRegion
+@pytest.mark.cosmosPerPartitionAutomaticFailover
 @pytest.mark.asyncio
 class TestPerPartitionCircuitBreakerSmMrrAsync:
     host = test_config.TestConfig.host

From 151a2facf819d7e2495ff1b8ef94ceafa8414ac7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Sun, 15 Jun 2025 20:53:10 -0400
Subject: [PATCH 03/68] Update test_per_partition_automatic_failover_async.py

---
 .../tests/test_per_partition_automatic_failover_async.py         | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 7a133d1f34cc..9720830b742e 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -79,7 +79,6 @@ async def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
 
     @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
     async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation, error):
-        os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
         # This test validates that the partition info cache is updated correctly upon failures, and that the
         # per-partition automatic failover logic routes requests to the next available regional endpoint
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(

From b9e0a081bf572f9bb5fb002f20aca9edfad6ac95 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 16 Jun 2025 09:08:59 -0400
Subject: [PATCH 04/68] CI fixes

---
 ...ion_endpoint_manager_per_partition_automatic_failover.py | 5 ++---
 ...dpoint_manager_per_partition_automatic_failover_async.py | 6 +++---
 .../tests/test_per_partition_automatic_failover.py          | 2 +-
 .../tests/test_per_partition_automatic_failover_async.py    | 2 +-
 sdk/cosmos/live-platform-matrix.json                        | 2 +-
 5 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index bbce867d4b53..2a4905013503 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -20,7 +20,7 @@
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
 
 if TYPE_CHECKING:
-    from azure.cosmos._cosmos_client_connection import CosmosClientConnection
+    from azure.cosmos.aio._cosmos_client_connection_async import CosmosClientConnection
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
@@ -126,8 +126,7 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
             return self._resolve_service_endpoint(request)
-        else:
-            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
+        return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
             self,
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index a9b974483614..d177a50603d7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -58,7 +58,8 @@ def try_move_to_next_location(self, available_account_regional_endpoints: Set[st
 
             return False
 
-class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync(_GlobalPartitionEndpointManagerForCircuitBreakerAsync):
+class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync(
+    _GlobalPartitionEndpointManagerForCircuitBreakerAsync):
     """
     This internal class implements the logic for partition endpoint management for
     geo-replicated database accounts.
@@ -126,8 +127,7 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
             return self._resolve_service_endpoint(request)
-        else:
-            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
+        return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
             self,
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 3c88e8c59911..77857c20d325 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -13,7 +13,7 @@
 from test_per_partition_circuit_breaker_mm import (REGION_1, REGION_2, PK_VALUE, BATCH,
                                                    write_operations_and_errors, perform_write_operation)
 
-# cspell:disable-file
+# cspell:disable
 
 @pytest.fixture(scope="class", autouse=True)
 def setup_teardown():
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 9720830b742e..d9536d99192b 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -17,7 +17,7 @@
 from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
-# cspell:disable-file
+# cspell:disable
 
 @pytest.fixture(scope="class", autouse=True)
 def setup_teardown():
diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json
index fb21bacff0e1..08bc50407192 100644
--- a/sdk/cosmos/live-platform-matrix.json
+++ b/sdk/cosmos/live-platform-matrix.json
@@ -61,7 +61,7 @@
     },
     {
       "PerPartitionAutomaticFailoverTestConfig": {
-        "Ubuntu2004_313_ppaf": {
+        "Ubuntu2004_313_partition_automatic_failover": {
           "OSVmImage": "env:LINUXVMIMAGE",
           "Pool": "env:LINUXPOOL",
           "PythonVersion": "3.13",

From e4d7046a747d1c942f5a374f8c7c141b2c62e3f3 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 16 Jun 2025 09:36:35 -0400
Subject: [PATCH 05/68] changelog

---
 sdk/cosmos/azure-cosmos/CHANGELOG.md                            | 1 +
 ...rtition_endpoint_manager_per_partition_automatic_failover.py | 2 +-
 ...n_endpoint_manager_per_partition_automatic_failover_async.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md
index ba4f51186488..5fde0181b7a3 100644
--- a/sdk/cosmos/azure-cosmos/CHANGELOG.md
+++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md
@@ -3,6 +3,7 @@
 ### 4.13.0b2 (Unreleased)
 
 #### Features Added
+* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-configure-per-partition-automatic-failover) and enable through the environment variable `AZURE_COSMOS_ENABLE_PER_PARTITION_FAILOVER`. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Breaking Changes
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 2a4905013503..25c868357d62 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -20,7 +20,7 @@
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
 
 if TYPE_CHECKING:
-    from azure.cosmos.aio._cosmos_client_connection_async import CosmosClientConnection
+    from azure.cosmos._cosmos_client_connection import CosmosClientConnection
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index d177a50603d7..b078d4b21e27 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -20,7 +20,7 @@
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
 
 if TYPE_CHECKING:
-    from azure.cosmos._cosmos_client_connection import CosmosClientConnection
+    from azure.cosmos.aio._cosmos_client_connection_async import CosmosClientConnection
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 

From 09e7163c30f4d8e4167d51a51b74f1b0285131f7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 16 Jun 2025 10:18:31 -0400
Subject: [PATCH 06/68] broken link

---
 sdk/cosmos/azure-cosmos/CHANGELOG.md                   |  2 +-
 .../tests/test_per_partition_automatic_failover.py     | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md
index 5fde0181b7a3..e30a8bbbe11f 100644
--- a/sdk/cosmos/azure-cosmos/CHANGELOG.md
+++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md
@@ -3,7 +3,7 @@
 ### 4.13.0b2 (Unreleased)
 
 #### Features Added
-* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-configure-per-partition-automatic-failover) and enable through the environment variable `AZURE_COSMOS_ENABLE_PER_PARTITION_FAILOVER`. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
+* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover) and enable through the environment variable `AZURE_COSMOS_ENABLE_PER_PARTITION_FAILOVER`. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Breaking Changes
 
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 77857c20d325..2cc21fea6045 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -15,11 +15,11 @@
 
 # cspell:disable
 
-@pytest.fixture(scope="class", autouse=True)
-def setup_teardown():
-    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
-    yield
-    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "False"
+# @pytest.fixture(scope="class", autouse=True)
+# def setup_teardown():
+#     os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
+#     yield
+#     os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "False"
 
 
 def create_errors():

From 4e28f66d583490a0c60a5ab38ddc6d1cd18b7e1d Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 16 Jun 2025 11:40:48 -0400
Subject: [PATCH 07/68] Update test_location_cache.py

---
 sdk/cosmos/azure-cosmos/tests/test_location_cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
index 4194fc5672b7..41cf287fc0d8 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
@@ -161,7 +161,7 @@ def test_get_applicable_regional_endpoints_excluded_regions(self, test_type):
                 [location4_name],
                 [],
             ]
-            excluded_locations_on_requests_list = [None] * 5
+            excluded_locations_on_requests_list = [[]] * 5
         elif test_type == "OnRequest":
             excluded_locations_on_client_list = [[]] * 5
             excluded_locations_on_requests_list = [

From c5319e872135e1762a53be6e5662472652c76227 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 16 Jun 2025 18:49:28 -0400
Subject: [PATCH 08/68] change PPAF detection logic

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py         | 2 --
 ...obal_partition_endpoint_manager_circuit_breaker_core.py | 7 ++++---
 ...on_endpoint_manager_per_partition_automatic_failover.py | 7 +------
 ...point_manager_per_partition_automatic_failover_async.py | 7 +------
 .../tests/test_per_partition_automatic_failover.py         | 7 -------
 .../tests/test_per_partition_automatic_failover_async.py   | 6 ------
 6 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
index c2812f3481f8..dd076002b4f9 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
@@ -54,8 +54,6 @@ class _Constants:
     MAX_ITEM_BUFFER_VS_CONFIG_DEFAULT: int = 50000
     CIRCUIT_BREAKER_ENABLED_CONFIG: str =  "AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER"
     CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT: str = "False"
-    PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG: str =  "AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"
-    PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT: str = "False"
     # Only applicable when circuit breaker is enabled -------------------------
     CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ: str = "AZURE_COSMOS_CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ"
     CONSECUTIVE_ERROR_COUNT_TOLERATED_FOR_READ_DEFAULT: int = 10
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
index f5335fc447ff..9586a7032ff7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
@@ -59,9 +59,10 @@ def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
         if not request:
             return False
 
-        circuit_breaker_enabled = os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
-                                                 os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
-                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT)).lower() == "true"
+        circuit_breaker_enabled = \
+            (self.client._global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior == True or
+                                   os.environ.get(os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
+                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT)).lower() == "true")
         if not circuit_breaker_enabled:
             return False
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 25c868357d62..63904eef582e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -75,13 +75,8 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        per_partition_automatic_failover_config_enabled = (
-                os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
-                               Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true")
-
         # TODO: This check here needs to be verified once we test against a live account with the config enabled.
-        if (not per_partition_automatic_failover_config_enabled or
-                not self._database_account_cache._EnablePerPartitionFailoverBehavior):
+        if not self._database_account_cache._EnablePerPartitionFailoverBehavior:
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index b078d4b21e27..8b18f90c62ef 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -76,13 +76,8 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        per_partition_automatic_failover_config_enabled = (
-                os.environ.get(Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG,
-                               Constants.PER_PARTITION_AUTOMATIC_FAILOVER_ENABLED_CONFIG_DEFAULT).lower() == "true")
-
         # TODO: This check here needs to be verified once we test against a live account with the config enabled.
-        if (not per_partition_automatic_failover_config_enabled or
-                not self._database_account_cache._EnablePerPartitionFailoverBehavior):
+        if not self._database_account_cache._EnablePerPartitionFailoverBehavior:
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 2cc21fea6045..0ea7f25e0867 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -15,13 +15,6 @@
 
 # cspell:disable
 
-# @pytest.fixture(scope="class", autouse=True)
-# def setup_teardown():
-#     os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
-#     yield
-#     os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "False"
-
-
 def create_errors():
     errors = []
     error_codes = [403, 408, 500, 502, 503]
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index d9536d99192b..5b8554a93c31 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -19,12 +19,6 @@
 
 # cspell:disable
 
-@pytest.fixture(scope="class", autouse=True)
-def setup_teardown():
-    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "True"
-    yield
-    os.environ["AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER"] = "False"
-
 # These tests assume that the configured live account has one main write region and one secondary read region.
 
 @pytest.mark.cosmosPerPartitionAutomaticFailover

From eba60933d13bb1e4fea804b9895a72ba34108870 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 16 Jun 2025 19:26:06 -0400
Subject: [PATCH 09/68] Update
 _global_partition_endpoint_manager_circuit_breaker_core.py

---
 ..._global_partition_endpoint_manager_circuit_breaker_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
index 9586a7032ff7..09c5b838f931 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
@@ -61,8 +61,8 @@ def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
 
         circuit_breaker_enabled = \
             (self.client._global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior == True or
-                                   os.environ.get(os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
-                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT)).lower() == "true")
+                                   os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
+                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower() == "true")
         if not circuit_breaker_enabled:
             return False
 

From 2ec5c5d6e72a38df5813abfbe5c48bed30fe3c52 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 17 Jun 2025 09:27:28 -0400
Subject: [PATCH 10/68] Update
 _global_partition_endpoint_manager_circuit_breaker_core.py

---
 ...al_partition_endpoint_manager_circuit_breaker_core.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
index 09c5b838f931..8059ede8a316 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
@@ -59,10 +59,11 @@ def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
         if not request:
             return False
 
-        circuit_breaker_enabled = \
-            (self.client._global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior == True or
-                                   os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
-                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower() == "true")
+        circuit_breaker_enabled = os.environ.get(Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
+                                                 Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower() == "true"
+        if not circuit_breaker_enabled and self.client._global_endpoint_manager is not None:
+            if self.client._global_endpoint_manager._database_account_cache is not None:
+                circuit_breaker_enabled = self.client._global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior == True # pylint: disable=line-too-long
         if not circuit_breaker_enabled:
             return False
 

From 62d7be0d1704ba7d02a9d18dbfebee303d5f8c6c Mon Sep 17 00:00:00 2001
From: tvaron3 <tomas.varon1802@gmail.com>
Date: Tue, 17 Jun 2025 22:29:31 -0700
Subject: [PATCH 11/68] fix tests and remove environment variable

---
 sdk/cosmos/azure-cosmos/CHANGELOG.md                     | 2 +-
 .../azure/cosmos/_endpoint_discovery_retry_policy.py     | 9 ---------
 ..._endpoint_manager_per_partition_automatic_failover.py | 2 +-
 ...int_manager_per_partition_automatic_failover_async.py | 4 +---
 .../tests/test_per_partition_automatic_failover.py       | 3 +--
 .../tests/test_per_partition_automatic_failover_async.py | 9 ++++-----
 sdk/cosmos/live-platform-matrix.json                     | 2 +-
 sdk/cosmos/test-resources.bicep                          | 4 ----
 8 files changed, 9 insertions(+), 26 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md
index e30a8bbbe11f..ac4d91ec8545 100644
--- a/sdk/cosmos/azure-cosmos/CHANGELOG.md
+++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md
@@ -3,7 +3,7 @@
 ### 4.13.0b2 (Unreleased)
 
 #### Features Added
-* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover) and enable through the environment variable `AZURE_COSMOS_ENABLE_PER_PARTITION_FAILOVER`. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
+* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover). See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Breaking Changes
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index f562df2a7189..aabf247936fc 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -23,17 +23,8 @@
 Azure Cosmos database service.
 """
 
-import logging
 from azure.cosmos.documents import _OperationType
 
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-log_formatter = logging.Formatter("%(levelname)s:%(message)s")
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(log_formatter)
-logger.addHandler(log_handler)
-
-
 class EndpointDiscoveryRetryPolicy(object):
     """The endpoint discovery retry policy class used for geo-replicated database accounts
        to handle the write forbidden exceptions due to writable/readable location changes
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 63904eef582e..f329bc0e16ed 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -76,7 +76,7 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
             return False
 
         # TODO: This check here needs to be verified once we test against a live account with the config enabled.
-        if not self._database_account_cache._EnablePerPartitionFailoverBehavior:
+        if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 8b18f90c62ef..fca16249ffa2 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -5,13 +5,11 @@
 endpoint manager, since enabling per partition automatic failover also enables the circuit breaker logic.
 """
 import logging
-import os
 import threading
 
 from typing import Dict, Set, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
-from azure.cosmos._constants import _Constants as Constants
 from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import \
     _GlobalPartitionEndpointManagerForCircuitBreakerAsync
 from azure.cosmos.documents import _OperationType
@@ -77,7 +75,7 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
             return False
 
         # TODO: This check here needs to be verified once we test against a live account with the config enabled.
-        if not self._database_account_cache._EnablePerPartitionFailoverBehavior:
+        if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 0ea7f25e0867..fa5337514292 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -36,8 +36,7 @@ def create_errors():
 class TestPerPartitionAutomaticFailover:
     host = test_config.TestConfig.host
     master_key = test_config.TestConfig.masterKey
-    connectionPolicy = test_config.TestConfig.connectionPolicy
-    TEST_DATABASE_ID = "test_config.TestConfig.TEST_DATABASE_ID"
+    TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
     def setup_method_with_custom_transport(self, custom_transport, default_endpoint=host, **kwargs):
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 5b8554a93c31..0321a63fa799 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -1,12 +1,12 @@
 # The MIT License (MIT)
 # Copyright (c) Microsoft Corporation. All rights reserved.
-import os
 import unittest
 import uuid
 
 import asyncio
+
 import pytest
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 
 import test_config
 from azure.core.pipeline.transport._aiohttp import AioHttpTransport
@@ -26,11 +26,10 @@
 class TestPerPartitionAutomaticFailoverAsync:
     host = test_config.TestConfig.host
     master_key = test_config.TestConfig.masterKey
-    connectionPolicy = test_config.TestConfig.connectionPolicy
-    TEST_DATABASE_ID = "test_config.TestConfig.TEST_DATABASE_ID"
+    TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
-    async def setup_method_with_custom_transport(self, custom_transport: AioHttpTransport, default_endpoint=host, **kwargs):
+    async def setup_method_with_custom_transport(self, custom_transport: Optional[AioHttpTransport], default_endpoint=host, **kwargs):
         container_id = kwargs.pop("container_id", None)
         if not container_id:
             container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json
index 08bc50407192..36a685e54b0e 100644
--- a/sdk/cosmos/live-platform-matrix.json
+++ b/sdk/cosmos/live-platform-matrix.json
@@ -72,7 +72,7 @@
       },
       "ArmConfig": {
         "MultiRegion": {
-          "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; perPartitionAutomaticFailoverEnabled = 'True' }"
+          "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true;}"
         }
       }
     },
diff --git a/sdk/cosmos/test-resources.bicep b/sdk/cosmos/test-resources.bicep
index f6088ec122a6..735c1a0e66ee 100644
--- a/sdk/cosmos/test-resources.bicep
+++ b/sdk/cosmos/test-resources.bicep
@@ -15,9 +15,6 @@ param location string = resourceGroup().location
 @description('Whether Per Partition Circuit Breaker should be enabled.')
 param circuitBreakerEnabled string = 'False'
 
-@description('Whether Per Partition Automatic Failover should be enabled.')
-param perPartitionAutomaticFailoverEnabled string = 'False'
-
 @description('The api version to be used by Bicep to create resources')
 param apiVersion string = '2023-04-15'
 
@@ -108,6 +105,5 @@ resource accountName_roleAssignmentId 'Microsoft.DocumentDB/databaseAccounts/sql
 }
 
 output AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER string = circuitBreakerEnabled
-output AZURE_COSMOS_ENABLE_PER_PARTITION_AUTOMATIC_FAILOVER string = perPartitionAutomaticFailoverEnabled
 output ACCOUNT_HOST string = reference(resourceId, apiVersion).documentEndpoint
 output ACCOUNT_KEY string = listKeys(resourceId, apiVersion).primaryMasterKey

From 24b84153791c481adec4f62e1db17776b74418cd Mon Sep 17 00:00:00 2001
From: tvaron3 <tomas.varon1802@gmail.com>
Date: Mon, 23 Jun 2025 13:09:52 -0400
Subject: [PATCH 12/68] fix tests

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
index d43407a40a72..c966ed3d4218 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
@@ -40,7 +40,8 @@ def __init__(
         self.location_index_to_route: Optional[int] = None
         self.location_endpoint_to_route: Optional[str] = None
         self.last_routed_location_endpoint_within_region: Optional[str] = None
-        self.excluded_locations: List[str] = []
+        # fix this
+        self.excluded_locations: Optional[List[str]] = None
         self.excluded_locations_circuit_breaker: List[str] = []
         self.healthy_tentative_location: Optional[str] = None
 

From 9595327e8b61ed19e23b3cc25793af908be6fdbd Mon Sep 17 00:00:00 2001
From: tvaron3 <tomas.varon1802@gmail.com>
Date: Wed, 2 Jul 2025 14:55:58 -0700
Subject: [PATCH 13/68] revert excluded locations change

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py | 4 +++-
 sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
index 5363a31f3b30..52b7e8e0fc44 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
@@ -210,11 +210,13 @@ def get_ordered_read_locations(self):
     def _get_configured_excluded_locations(self, request: RequestObject) -> List[str]:
         # If excluded locations were configured on request, use request level excluded locations.
         excluded_locations = request.excluded_locations
-        if len(excluded_locations) == 0:
+        if excluded_locations is None:
             if self.connection_policy.ExcludedLocations:
                 # If excluded locations were only configured on client(connection_policy), use client level
                 # make copy of excluded locations to avoid modifying the original list
                 excluded_locations = list(self.connection_policy.ExcludedLocations)
+            else:
+                excluded_locations = []
         for excluded_location in request.excluded_locations_circuit_breaker:
             if excluded_location not in excluded_locations:
                 excluded_locations.append(excluded_location)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
index c966ed3d4218..d20eedb40148 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_request_object.py
@@ -40,7 +40,6 @@ def __init__(
         self.location_index_to_route: Optional[int] = None
         self.location_endpoint_to_route: Optional[str] = None
         self.last_routed_location_endpoint_within_region: Optional[str] = None
-        # fix this
         self.excluded_locations: Optional[List[str]] = None
         self.excluded_locations_circuit_breaker: List[str] = []
         self.healthy_tentative_location: Optional[str] = None

From 8911ef524fe3853a7ec0e20964c43a04f576331a Mon Sep 17 00:00:00 2001
From: tvaron3 <tomas.varon1802@gmail.com>
Date: Thu, 3 Jul 2025 13:41:45 -0700
Subject: [PATCH 14/68] fix analyze

---
 ..._global_partition_endpoint_manager_circuit_breaker_core.py | 4 +++-
 ...ition_endpoint_manager_per_partition_automatic_failover.py | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
index 8059ede8a316..91fd67805a18 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker_core.py
@@ -19,6 +19,8 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+# pylint: disable=protected-access
+
 """Internal class for global endpoint manager for circuit breaker.
 """
 import logging
@@ -63,7 +65,7 @@ def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
                                                  Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower() == "true"
         if not circuit_breaker_enabled and self.client._global_endpoint_manager is not None:
             if self.client._global_endpoint_manager._database_account_cache is not None:
-                circuit_breaker_enabled = self.client._global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior == True # pylint: disable=line-too-long
+                circuit_breaker_enabled = self.client._global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior is True # pylint: disable=line-too-long
         if not circuit_breaker_enabled:
             return False
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index f329bc0e16ed..6dfb759a88f8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -5,13 +5,11 @@
 endpoint manager, since enabling per partition automatic failover also enables the circuit breaker logic.
 """
 import logging
-import os
 import threading
 
 from typing import Dict, Set, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
-from azure.cosmos._constants import _Constants as Constants
 from azure.cosmos._global_partition_endpoint_manager_circuit_breaker import \
     _GlobalPartitionEndpointManagerForCircuitBreaker
 from azure.cosmos.documents import _OperationType

From 25dbeb3a40266bf7fe83369eae7cec569c5a15bd Mon Sep 17 00:00:00 2001
From: tvaron3 <tomas.varon1802@gmail.com>
Date: Mon, 7 Jul 2025 14:30:50 -0700
Subject: [PATCH 15/68] test excluded locations

---
 .../azure-cosmos/tests/test_excluded_locations_async.py       | 1 -
 sdk/cosmos/azure-cosmos/tests/test_location_cache.py          | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_excluded_locations_async.py b/sdk/cosmos/azure-cosmos/tests/test_excluded_locations_async.py
index 1b2928de217e..d4d01de83e80 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_excluded_locations_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_excluded_locations_async.py
@@ -10,7 +10,6 @@
 import pytest_asyncio
 
 from azure.cosmos.aio import CosmosClient
-from azure.cosmos.partition_key import PartitionKey
 from test_excluded_locations import (TestDataType, set_test_data_type,
                                      read_item_test_data, write_item_test_data, read_and_write_item_test_data,
                                      verify_endpoint)
diff --git a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
index 41cf287fc0d8..52797696a5d2 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
@@ -150,7 +150,7 @@ def test_resolve_request_endpoint_preferred_regions(self):
         assert read_resolved == write_resolved
         assert read_resolved == default_endpoint
 
-    @pytest.mark.parametrize("test_type",["OnClient", "OnRequest", "OnBoth"])
+    @pytest.mark.parametrize("test_type",["OnClient"])
     def test_get_applicable_regional_endpoints_excluded_regions(self, test_type):
         # Init test data
         if test_type == "OnClient":
@@ -161,7 +161,7 @@ def test_get_applicable_regional_endpoints_excluded_regions(self, test_type):
                 [location4_name],
                 [],
             ]
-            excluded_locations_on_requests_list = [[]] * 5
+            excluded_locations_on_requests_list = [None] * 5
         elif test_type == "OnRequest":
             excluded_locations_on_client_list = [[]] * 5
             excluded_locations_on_requests_list = [

From d61a9a9ca30de998b08ef46cb02cdce631dc573c Mon Sep 17 00:00:00 2001
From: tvaron3 <tomas.varon1802@gmail.com>
Date: Mon, 7 Jul 2025 23:32:19 -0700
Subject: [PATCH 16/68] Add different error handling for 503 and 408s, update
 README

---
 sdk/cosmos/azure-cosmos/README.md             |  5 ++
 .../azure-cosmos/azure/cosmos/_constants.py   |  5 ++
 .../azure/cosmos/_cosmos_client_connection.py |  1 -
 ...anager_per_partition_automatic_failover.py | 10 +--
 .../azure/cosmos/_retry_utility.py            |  7 ++-
 .../_service_unavailable_retry_policy.py      | 61 +++++++++++++++++++
 .../cosmos/_timeout_failover_retry_policy.py  | 10 +--
 .../aio/_cosmos_client_connection_async.py    |  1 -
 ..._per_partition_automatic_failover_async.py |  5 +-
 .../azure/cosmos/aio/_retry_utility_async.py  |  7 ++-
 .../azure/cosmos/http_constants.py            |  1 +
 .../azure-cosmos/tests/test_location_cache.py |  2 +-
 .../test_per_partition_automatic_failover.py  |  3 +-
 13 files changed, 95 insertions(+), 23 deletions(-)
 create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py

diff --git a/sdk/cosmos/azure-cosmos/README.md b/sdk/cosmos/azure-cosmos/README.md
index 17bd8a7560e2..e993fee96c1c 100644
--- a/sdk/cosmos/azure-cosmos/README.md
+++ b/sdk/cosmos/azure-cosmos/README.md
@@ -909,6 +909,11 @@ requests to another region:
 - `AZURE_COSMOS_FAILURE_PERCENTAGE_TOLERATED`: Default is a `90` percent failure rate.
   - After a partition reaches a 90 percent failure rate for all requests, the SDK will send requests routed to that partition to another region.
 
+### Per Partition Automatic Failover (Public Preview)
+Per partition automatic failover enables the SDK to automatically redirect write requests at the partition level to another region based on service-side signals. This feature is available 
+only for single write region accounts that have at least one read-only region. When per partition automatic failover is enabled, per partition circuit breaker and hedging is enabled by default, meaning 
+all its configurable options also apply to per partition automatic failover. To enable this feature, follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover).
+
 ## Troubleshooting
 
 ### General
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
index dd076002b4f9..1f9a7d81eef7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
@@ -25,6 +25,7 @@
 
 from typing import Dict
 from typing_extensions import Literal
+# cspell:disable-line
 
 
 class _Constants:
@@ -62,6 +63,10 @@ class _Constants:
     FAILURE_PERCENTAGE_TOLERATED = "AZURE_COSMOS_FAILURE_PERCENTAGE_TOLERATED"
     FAILURE_PERCENTAGE_TOLERATED_DEFAULT: int = 90
     # -------------------------------------------------------------------------
+    # Only applicable when per partition automatic failover is enabled --------
+    TIMEOUT_ERROR_THRESHOLD_PPAF = "AZURE_COSMOS_TIMEOUT_ERROR_THRESHOLD_FOR_PPAF"
+    TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT: int = 10
+    # -------------------------------------------------------------------------
 
     # Error code translations
     ERROR_TRANSLATIONS: Dict[int, str] = {
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
index c135277f6c9d..1756849613f1 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py
@@ -2627,7 +2627,6 @@ def GetDatabaseAccount(
                 self.connection_policy.UseMultipleWriteLocations and database_account._EnableMultipleWritableLocations
         )
 
-        # TODO: Verify that this is the correct variable from the service
         if Constants.EnablePerPartitionFailoverBehavior in result:
             database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 6dfb759a88f8..643ec509c852 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -73,7 +73,6 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        # TODO: This check here needs to be verified once we test against a live account with the config enabled.
         if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
             return False
 
@@ -110,7 +109,7 @@ def resolve_service_endpoint_for_partition(
                             # main write region in the account configurations
                             self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
                             request.clear_route_to_location()
-                            return self._resolve_service_endpoint(request)
+                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
@@ -118,7 +117,7 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info = PartitionLevelFailoverInfo()
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
-            return self._resolve_service_endpoint(request)
+            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
@@ -131,7 +130,10 @@ def compute_available_preferred_regions(
         :return: A set of available regional endpoints.
         :rtype: Set[str]
         """
-        excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
+        if request.excluded_locations:
+            excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
+        else:
+            excluded_locations = self.location_cache.connection_policy.ExcludedLocations
         preferred_locations = self.PreferredLocations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
         available_regional_endpoints = {
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 50c26e87cb62..c18f580f52ab 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -29,7 +29,7 @@
 from azure.core.pipeline import PipelineRequest
 from azure.core.pipeline.policies import RetryPolicy
 
-from . import _container_recreate_retry_policy, _database_account_retry_policy
+from . import _container_recreate_retry_policy, _database_account_retry_policy, _service_unavailable_retry_policy
 from . import _default_retry_policy
 from . import _endpoint_discovery_retry_policy
 from . import _gone_retry_policy
@@ -95,6 +95,8 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
     service_request_retry_policy = _service_request_retry_policy.ServiceRequestRetryPolicy(
         client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args,
     )
+    service_unavailable_retry_policy = _service_unavailable_retry_policy._ServiceUnavailableRetryPolicy(
+        client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args)
     # HttpRequest we would need to modify for Container Recreate Retry Policy
     request = None
     if args and len(args) > 3:
@@ -181,9 +183,12 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
 
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
+            elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
+                retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
                     # record the failure for circuit breaker tracking
+                    # TODO: change this to track errors for ppaf
                     global_endpoint_manager.record_failure(args[0])
                 retry_policy = timeout_failover_retry_policy
             else:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
new file mode 100644
index 000000000000..3dc3df1aac70
--- /dev/null
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -0,0 +1,61 @@
+# The MIT License (MIT)
+# Copyright (c) Microsoft Corporation. All rights reserved.
+
+"""Internal class for service unavailable retry policy implementation in the Azure
+Cosmos database service.
+"""
+
+class _ServiceUnavailableRetryPolicy(object):
+
+    def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper, *args):
+        self.retry_after_in_milliseconds = 500
+        self.global_endpoint_manager = global_endpoint_manager
+        self.pk_range_wrapper = pk_range_wrapper
+        # If an account only has 1 region, then we still want to retry once on the same region
+        self._max_retry_attempt_count = (len(self.global_endpoint_manager.location_cache.read_regional_routing_contexts)
+                                         + 1)
+        self.retry_count = 0
+        self.connection_policy = connection_policy
+        self.request = args[0] if args else None
+
+    def ShouldRetry(self, _exception):
+        """Returns true if the request should retry based on the passed-in exception.
+
+        :param exceptions.CosmosHttpResponseError _exception:
+        :returns: a boolean stating whether the request should be retried
+        :rtype: bool
+        """
+        # writes are retried for 503s
+        if not self.connection_policy.EnableEndpointDiscovery:
+            return False
+
+        self.retry_count += 1
+        # Check if the next retry about to be done is safe
+        if self.retry_count >= self._max_retry_attempt_count:
+            return False
+
+        if self.request:
+            location_endpoint = self.resolve_next_region_service_endpoint()
+            self.request.route_to_location(location_endpoint)
+        return True
+
+    # This function prepares the request to go to the next region
+    def resolve_next_region_service_endpoint(self):
+        if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+            # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
+            # and resolve the service endpoint for the partition range - otherwise, continue with default retry logic
+            partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
+            partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
+            return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request,
+                                                                                       self.pk_range_wrapper)
+
+        # clear previous location-based routing directive
+        self.request.clear_route_to_location()
+        # clear the last routed endpoint within same region since we are going to a new region now
+        self.request.last_routed_location_endpoint_within_region = None
+        # set location-based routing directive based on retry count
+        # ensuring usePreferredLocations is set to True for retry
+        self.request.route_to_location_with_preferred_location_flag(self.retry_count, True)
+        # Resolve the endpoint for the request and pin the resolution to the resolved endpoint
+        # This enables marking the endpoint unavailability on endpoint failover/unreachability
+        return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index 8e60e0f7dcf6..434d15f51bb4 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -27,8 +27,7 @@ def ShouldRetry(self, _exception):
         :returns: a boolean stating whether the request should be retried
         :rtype: bool
         """
-        if self.request and (not _OperationType.IsReadOnlyOperation(self.request.operation_type) and
-                        not self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request)):
+        if self.request and not _OperationType.IsReadOnlyOperation(self.request.operation_type):
             return False
 
         if not self.connection_policy.EnableEndpointDiscovery:
@@ -46,13 +45,6 @@ def ShouldRetry(self, _exception):
 
     # This function prepares the request to go to the next region
     def resolve_next_region_service_endpoint(self):
-        if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
-            # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
-            # and resolve the service endpoint for the partition range - otherwise, continue with default retry logic
-            partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
-            partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
-            return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request,
-                                                                                       self.pk_range_wrapper)
 
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py
index a56cb5777406..89d6b23c06d3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py
@@ -453,7 +453,6 @@ async def GetDatabaseAccount(
                 self.connection_policy.UseMultipleWriteLocations and database_account._EnableMultipleWritableLocations
         )
 
-        # TODO: Verify that this is the correct variable from the service
         if Constants.EnablePerPartitionFailoverBehavior in result:
             database_account._EnablePerPartitionFailoverBehavior = result[Constants.EnablePerPartitionFailoverBehavior]
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index fca16249ffa2..0192548506c7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -74,7 +74,6 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        # TODO: This check here needs to be verified once we test against a live account with the config enabled.
         if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
             return False
 
@@ -111,7 +110,7 @@ def resolve_service_endpoint_for_partition(
                             # main write region in the account configurations
                             self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
                             request.clear_route_to_location()
-                            return self._resolve_service_endpoint(request)
+                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
@@ -119,7 +118,7 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info = PartitionLevelFailoverInfo()
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
-            return self._resolve_service_endpoint(request)
+            return self._resolve_service_endpoint_for_partition_circuit_breaker(request)
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 7884f9060183..c1b3d6f3eea8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -28,7 +28,7 @@
 from azure.core.exceptions import AzureError, ClientAuthenticationError, ServiceRequestError, ServiceResponseError
 from azure.core.pipeline.policies import AsyncRetryPolicy
 
-from .. import _default_retry_policy, _database_account_retry_policy
+from .. import _default_retry_policy, _database_account_retry_policy, _service_unavailable_retry_policy
 from .. import _endpoint_discovery_retry_policy
 from .. import _gone_retry_policy
 from .. import _resource_throttle_retry_policy
@@ -94,6 +94,8 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
     service_request_retry_policy = _service_request_retry_policy.ServiceRequestRetryPolicy(
         client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args,
     )
+    service_unavailable_retry_policy = _service_unavailable_retry_policy._ServiceUnavailableRetryPolicy(
+        client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args)
     # HttpRequest we would need to modify for Container Recreate Retry Policy
     request = None
     if args and len(args) > 3:
@@ -180,10 +182,13 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
 
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
+            elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
+                retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 # record the failure for circuit breaker tracking
                 if args:
                     await global_endpoint_manager.record_failure(args[0])
+                # TODO: change this to track errors for ppaf
                 retry_policy = timeout_failover_retry_policy
             else:
                 retry_policy = defaultRetry_policy
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py
index 81804f71dfef..7324649a5d43 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py
@@ -401,6 +401,7 @@ class StatusCodes:
     RETRY_WITH = 449
 
     INTERNAL_SERVER_ERROR = 500
+    SERVICE_UNAVAILABLE = 503
 
     # Operation pause and cancel. These are FAKE status codes for QOS logging purpose only.
     OPERATION_PAUSED = 1200
diff --git a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
index 52797696a5d2..4194fc5672b7 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_location_cache.py
@@ -150,7 +150,7 @@ def test_resolve_request_endpoint_preferred_regions(self):
         assert read_resolved == write_resolved
         assert read_resolved == default_endpoint
 
-    @pytest.mark.parametrize("test_type",["OnClient"])
+    @pytest.mark.parametrize("test_type",["OnClient", "OnRequest", "OnBoth"])
     def test_get_applicable_regional_endpoints_excluded_regions(self, test_type):
         # Init test data
         if test_type == "OnClient":
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index fa5337514292..69d18595e123 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -1,6 +1,5 @@
 # The MIT License (MIT)
 # Copyright (c) Microsoft Corporation. All rights reserved.
-import os
 import unittest
 import uuid
 
@@ -17,7 +16,7 @@
 
 def create_errors():
     errors = []
-    error_codes = [403, 408, 500, 502, 503]
+    error_codes = [403, 503]
     for error_code in error_codes:
         if error_code == 403:
             errors.append(CosmosHttpResponseError(

From f1c69ed5f2f3ae395b7b3600147d06607262b615 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 31 Jul 2025 13:34:20 -0400
Subject: [PATCH 17/68] mypy, cspell, pylint

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py           | 2 +-
 ...tion_endpoint_manager_per_partition_automatic_failover.py | 3 ++-
 sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py       | 1 +
 ...ndpoint_manager_per_partition_automatic_failover_async.py | 5 +++--
 .../azure-cosmos/azure/cosmos/aio/_retry_utility_async.py    | 1 +
 5 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
index c3a75b734c08..304be31411be 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
@@ -25,7 +25,7 @@
 
 from typing import Dict
 from typing_extensions import Literal
-# cspell:disable-line
+# cspell:ignore PPAF
 
 
 class _Constants:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 643ec509c852..46d5cf207285 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -109,7 +109,8 @@ def resolve_service_endpoint_for_partition(
                             # main write region in the account configurations
                             self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
                             request.clear_route_to_location()
-                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
+                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request,
+                                                                                                pk_range_wrapper)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 272ad3339ef3..63b24a16b94f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -44,6 +44,7 @@
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
+# cspell:ignore ppaf
 
 # args [0] is the request object
 # args [1] is the connection policy
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 0192548506c7..c356f386e9f3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -110,7 +110,8 @@ def resolve_service_endpoint_for_partition(
                             # main write region in the account configurations
                             self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
                             request.clear_route_to_location()
-                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request)
+                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request,
+                                                                                                pk_range_wrapper)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
@@ -118,7 +119,7 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info = PartitionLevelFailoverInfo()
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
-            return self._resolve_service_endpoint_for_partition_circuit_breaker(request)
+            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index dfa181e8976a..1c4d44b647b5 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -45,6 +45,7 @@
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
+# cspell:ignore ppaf
 
 # args [0] is the request object
 # args [1] is the connection policy

From 9306d15f493dacf20256334f154375260556bdab Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 31 Jul 2025 19:04:16 -0400
Subject: [PATCH 18/68] remove tag from tests since config is service based

---
 .../tests/test_per_partition_circuit_breaker_sm_mrr.py           | 1 -
 .../tests/test_per_partition_circuit_breaker_sm_mrr_async.py     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
index a43003ed22b4..7c63e8e82897 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
@@ -38,7 +38,6 @@ def validate_unhealthy_partitions(global_endpoint_manager,
     assert unhealthy_partitions == expected_unhealthy_partitions
 
 @pytest.mark.cosmosCircuitBreakerMultiRegion
-@pytest.mark.cosmosPerPartitionAutomaticFailover
 class TestPerPartitionCircuitBreakerSmMrr:
     host = test_config.TestConfig.host
     master_key = test_config.TestConfig.masterKey
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
index d1925988fca6..9779b9c68362 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
@@ -23,7 +23,6 @@
 COLLECTION = "created_collection"
 
 @pytest.mark.cosmosCircuitBreakerMultiRegion
-@pytest.mark.cosmosPerPartitionAutomaticFailover
 @pytest.mark.asyncio
 class TestPerPartitionCircuitBreakerSmMrrAsync:
     host = test_config.TestConfig.host

From bd07d8351909f136acceab05ea79c906f71bdf9e Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 7 Aug 2025 16:45:49 -0400
Subject: [PATCH 19/68] add threshold-based retries for 408, 5xx errors

---
 ...tition_endpoint_manager_circuit_breaker.py | 20 +++++++-------
 ...anager_per_partition_automatic_failover.py | 26 +++++++++++++++++++
 .../azure/cosmos/_partition_health_tracker.py | 25 ++++++++++++++++++
 .../azure/cosmos/_retry_utility.py            |  9 +++----
 .../azure/cosmos/_synchronized_request.py     | 17 +++++++++---
 .../cosmos/_timeout_failover_retry_policy.py  | 16 ++++++++++++
 .../azure/cosmos/aio/_asynchronous_request.py | 13 +++++++++-
 ..._endpoint_manager_circuit_breaker_async.py | 20 +++++++-------
 ..._per_partition_automatic_failover_async.py | 26 +++++++++++++++++++
 .../azure/cosmos/aio/_retry_utility_async.py  |  8 +++---
 .../test_per_partition_automatic_failover.py  |  3 ++-
 11 files changed, 150 insertions(+), 33 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
index d188d7713cb7..94fc4eafb98e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
@@ -89,12 +89,13 @@ def create_pk_range_wrapper(self, request: RequestObject) -> Optional[PartitionK
 
         return PartitionKeyRangeWrapper(partition_range, container_rid)
 
-    def record_failure(
+    def record_ppcb_failure(
             self,
-            request: RequestObject
-    ) -> None:
+            request: RequestObject,
+            pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None)-> None:
         if self.is_circuit_breaker_applicable(request):
-            pk_range_wrapper = self.create_pk_range_wrapper(request)
+            if pk_range_wrapper is None:
+                pk_range_wrapper = self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
                 self.global_partition_endpoint_manager_core.record_failure(request, pk_range_wrapper)
 
@@ -109,11 +110,12 @@ def _resolve_service_endpoint_for_partition_circuit_breaker(
                                                                                                     pk_range_wrapper)
         return self._resolve_service_endpoint(request)
 
-    def record_success(
+    def record_ppcb_success(
             self,
-            request: RequestObject
-    ) -> None:
-        if self.global_partition_endpoint_manager_core.is_circuit_breaker_applicable(request):
-            pk_range_wrapper = self.create_pk_range_wrapper(request)
+            request: RequestObject,
+            pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
+        if self.is_circuit_breaker_applicable(request):
+            if pk_range_wrapper is None:
+                pk_range_wrapper = self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
                 self.global_partition_endpoint_manager_core.record_success(request, pk_range_wrapper)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 46d5cf207285..a1bcae1da3d8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -12,6 +12,7 @@
 from azure.cosmos.http_constants import ResourceType
 from azure.cosmos._global_partition_endpoint_manager_circuit_breaker import \
     _GlobalPartitionEndpointManagerForCircuitBreaker
+from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
 from azure.cosmos.documents import _OperationType
 
 from azure.cosmos._request_object import RequestObject
@@ -64,6 +65,7 @@ class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover(_GlobalPar
     def __init__(self, client: "CosmosClientConnection"):
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover, self).__init__(client)
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
+        self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
 
     def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
         if not request:
@@ -142,3 +144,27 @@ def compute_available_preferred_regions(
             for region in available_regions
         }
         return available_regional_endpoints
+
+    def record_failure(self,
+                       request: RequestObject,
+                       pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
+        """Records a failure for the given partition key range and request."""
+        if self.is_per_partition_automatic_failover_applicable(request):
+            if pk_range_wrapper is None:
+                pk_range_wrapper = self.create_pk_range_wrapper(request)
+            if pk_range_wrapper:
+                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper)
+        else:
+            self.record_ppcb_failure(request, pk_range_wrapper)
+
+    def record_success(self,
+                       request: RequestObject,
+                       pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
+        """Records a failure for the given partition key range and request."""
+        if self.is_per_partition_automatic_failover_applicable(request):
+            if pk_range_wrapper is None:
+                pk_range_wrapper = self.create_pk_range_wrapper(request)
+            if pk_range_wrapper:
+                self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+        else:
+            self.record_ppcb_success(request, pk_range_wrapper)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
index 0fc10fcc2bce..e7c44d01120f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
@@ -44,6 +44,8 @@
 LAST_UNAVAILABILITY_CHECK_TIME_STAMP = "lastUnavailabilityCheckTimeStamp"
 HEALTH_STATUS = "healthStatus"
 
+#cspell:ignore PPAF
+
 class _PartitionHealthInfo(object):
     """
     This internal class keeps the health and statistics for a partition.
@@ -290,3 +292,26 @@ def _reset_partition_health_tracker_stats(self) -> None:
         for locations in self.pk_range_wrapper_to_health_info.values():
             for health_info in locations.values():
                 health_info.reset_failure_rate_health_stats()
+
+class _PPAFPartitionThresholdsTracker(object):
+    """
+    This internal class implements the logic for tracking consecutive failure thresholds for a partition
+    in the context for per-partition automatic failover. This tracker is only used in the context of 408, 5xx and
+    ServiceResponseError errors as a defensive measure to avoid failing over too early without confirmation
+    from the service.
+    """
+
+    def __init__(self) -> None:
+        self.pk_range_wrapper_to_failure_count: Dict[PartitionKeyRangeWrapper, int] = {}
+
+    def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
+        if pk_range_wrapper not in self.pk_range_wrapper_to_failure_count:
+            self.pk_range_wrapper_to_failure_count[pk_range_wrapper] = 0
+        self.pk_range_wrapper_to_failure_count[pk_range_wrapper] += 1
+
+    def clear_pk_failures(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
+        if pk_range_wrapper in self.pk_range_wrapper_to_failure_count:
+            del self.pk_range_wrapper_to_failure_count[pk_range_wrapper]
+
+    def get_pk_failures(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> int:
+        return self.pk_range_wrapper_to_failure_count.get(pk_range_wrapper, 0)
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 63b24a16b94f..ee5ded634082 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -115,7 +115,7 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
         try:
             if args:
                 result = ExecuteFunction(function, global_endpoint_manager, *args, **kwargs)
-                global_endpoint_manager.record_success(args[0])
+                global_endpoint_manager.record_success(args[0], pk_range_wrapper)
             else:
                 result = ExecuteFunction(function, *args, **kwargs)
             if not client.last_response_headers:
@@ -195,8 +195,7 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
                     # record the failure for circuit breaker tracking
-                    # TODO: change this to track errors for ppaf
-                    global_endpoint_manager.record_failure(args[0])
+                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = timeout_failover_retry_policy
             else:
                 retry_policy = defaultRetry_policy
@@ -230,7 +229,7 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
                     raise e
             else:
                 if args:
-                    global_endpoint_manager.record_failure(args[0])
+                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 _handle_service_request_retries(client, service_request_retry_policy, e, *args)
 
         except ServiceResponseError as e:
@@ -239,7 +238,7 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
                     raise e
             else:
                 if args:
-                    global_endpoint_manager.record_failure(args[0])
+                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 _handle_service_response_retries(request, client, service_response_retry_policy, e, *args)
 
 def ExecuteFunction(function, *args, **kwargs):
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index bb338f443dca..8e70f443e052 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -27,10 +27,13 @@
 
 from urllib.parse import urlparse
 from azure.core.exceptions import DecodeError  # type: ignore
+from azure.core import PipelineClient
+from typing import Any
 
-from . import exceptions
-from . import http_constants
-from . import _retry_utility
+from . import exceptions, http_constants, _retry_utility
+from .documents import ConnectionPolicy
+from ._request_object import RequestObject
+from ._global_partition_endpoint_manager_per_partition_automatic_failover import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover
 
 
 def _is_readable_stream(obj):
@@ -65,7 +68,13 @@ def _request_body_from_data(data):
     return None
 
 
-def _Request(global_endpoint_manager, request_params, connection_policy, pipeline_client, request, **kwargs): # pylint: disable=too-many-statements
+def _Request(
+        global_endpoint_manager: _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover,
+        request_params: RequestObject,
+        connection_policy: ConnectionPolicy,
+        pipeline_client: PipelineClient,
+        request: Any,
+        **kwargs): # pylint: disable=too-many-statements
     """Makes one http request using the requests module.
 
     :param _GlobalEndpointManager global_endpoint_manager:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index 7506500b463a..33b2596ccb5a 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -4,8 +4,11 @@
 """Internal class for timeout failover retry policy implementation in the Azure
 Cosmos database service.
 """
+import os
 from azure.cosmos.documents import _OperationType
+from azure.cosmos._constants import _Constants as Constants
 
+# cspell:ignore PPAF, ppaf
 
 class _TimeoutFailoverRetryPolicy(object):
 
@@ -37,6 +40,19 @@ def ShouldRetry(self, _exception):
         :returns: a boolean stating whether the request should be retried
         :rtype: bool
         """
+        # PPAF will have its own retry logic based on consecutive failures before failing over to the next region
+        if self.request and self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+            if (self.global_endpoint_manager.ppaf_thresholds_tracker.get_pk_failures(self.pk_range_wrapper)
+                >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
+                                      Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
+                # If the PPAF threshold is reached, we reset the count and retry to the next region
+                self.global_endpoint_manager.ppaf_thresholds_tracker.clear_pk_failures(self.pk_range_wrapper)
+                partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[
+                    self.pk_range_wrapper]
+                partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
+                self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
+                return True
+
         # we retry only if the request is a read operation or if it is a write operation with retry enabled
         if self.request and not self.is_operation_retryable():
             return False
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 1cd2a22039b4..310998728ed6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -24,17 +24,28 @@
 import copy
 import json
 import time
+from typing import Any
 
 from urllib.parse import urlparse
+from azure.core import AsyncPipelineClient
 from azure.core.exceptions import DecodeError  # type: ignore
 
 from .. import exceptions
 from .. import http_constants
 from . import _retry_utility_async
+from ..documents import ConnectionPolicy
+from .._request_object import RequestObject
+from ._global_partition_endpoint_manager_per_partition_automatic_failover_async import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync
 from .._synchronized_request import _request_body_from_data, _replace_url_prefix
 
 
-async def _Request(global_endpoint_manager, request_params, connection_policy, pipeline_client, request, **kwargs): # pylint: disable=too-many-statements
+async def _Request(
+        global_endpoint_manager: _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync,
+        request_params: RequestObject,
+        connection_policy: ConnectionPolicy,
+        pipeline_client: AsyncPipelineClient,
+        request: Any,
+        **kwargs): # pylint: disable=too-many-statements
     """Makes one http request using the requests module.
 
     :param _GlobalEndpointManager global_endpoint_manager:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py
index c229d37082f0..3918a12622d0 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_circuit_breaker_async.py
@@ -35,7 +35,7 @@
 if TYPE_CHECKING:
     from azure.cosmos.aio._cosmos_client_connection_async import CosmosClientConnection
 
-
+# cspell:ignore ppcb
 # pylint: disable=protected-access
 class _GlobalPartitionEndpointManagerForCircuitBreakerAsync(_GlobalEndpointManager):
     """
@@ -90,12 +90,13 @@ async def create_pk_range_wrapper(self, request: RequestObject) -> Optional[Part
     def is_circuit_breaker_applicable(self, request: RequestObject) -> bool:
         return self.global_partition_endpoint_manager_core.is_circuit_breaker_applicable(request)
 
-    async def record_failure(
+    async def record_ppcb_failure(
             self,
-            request: RequestObject
-    ) -> None:
+            request: RequestObject,
+            pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
         if self.is_circuit_breaker_applicable(request):
-            pk_range_wrapper = await self.create_pk_range_wrapper(request)
+            if pk_range_wrapper is None:
+                pk_range_wrapper = await self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
                 self.global_partition_endpoint_manager_core.record_failure(request, pk_range_wrapper)
 
@@ -110,11 +111,12 @@ def _resolve_service_endpoint_for_partition_circuit_breaker(
                                                                                                     pk_range_wrapper)
         return self._resolve_service_endpoint(request)
 
-    async def record_success(
+    async def record_ppcb_success(
             self,
-            request: RequestObject
-    ) -> None:
+            request: RequestObject,
+            pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
         if self.is_circuit_breaker_applicable(request):
-            pk_range_wrapper = await self.create_pk_range_wrapper(request)
+            if pk_range_wrapper is None:
+                pk_range_wrapper = await self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
                 self.global_partition_endpoint_manager_core.record_success(request, pk_range_wrapper)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index c356f386e9f3..175a3df025b6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -14,6 +14,7 @@
     _GlobalPartitionEndpointManagerForCircuitBreakerAsync
 from azure.cosmos.documents import _OperationType
 
+from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
 from azure.cosmos._request_object import RequestObject
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
 
@@ -65,6 +66,7 @@ class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync(
     def __init__(self, client: "CosmosClientConnection"):
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync, self).__init__(client)
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
+        self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
 
     def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
         if not request:
@@ -140,3 +142,27 @@ def compute_available_preferred_regions(
             for region in available_regions
         }
         return available_regional_endpoints
+
+    async def record_failure(self,
+                             request: RequestObject,
+                             pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
+        """Records a failure for the given partition key range and request."""
+        if self.is_per_partition_automatic_failover_applicable(request):
+            if pk_range_wrapper is None:
+                pk_range_wrapper = await self.create_pk_range_wrapper(request)
+            if pk_range_wrapper:
+                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper)
+        else:
+            await self.record_ppcb_failure(request, pk_range_wrapper)
+
+    async def record_success(self,
+                             request: RequestObject,
+                             pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
+        """Records a failure for the given partition key range and request."""
+        if self.is_per_partition_automatic_failover_applicable(request):
+            if pk_range_wrapper is None:
+                pk_range_wrapper = await self.create_pk_range_wrapper(request)
+            if pk_range_wrapper:
+                self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+        else:
+            await self.record_ppcb_success(request, pk_range_wrapper)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 1c4d44b647b5..5b8e041541d3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -114,7 +114,7 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
         try:
             if args:
                 result = await ExecuteFunctionAsync(function, global_endpoint_manager, *args, **kwargs)
-                await global_endpoint_manager.record_success(args[0])
+                await global_endpoint_manager.record_success(args[0], pk_range_wrapper)
             else:
                 result = await ExecuteFunctionAsync(function, *args, **kwargs)
             if not client.last_response_headers:
@@ -193,7 +193,7 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 # record the failure for circuit breaker tracking
                 if args:
-                    await global_endpoint_manager.record_failure(args[0])
+                    await global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 # TODO: change this to track errors for ppaf
                 retry_policy = timeout_failover_retry_policy
             else:
@@ -242,12 +242,12 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
                         _handle_service_request_retries(client, service_request_retry_policy, e, *args)
                     else:
                         if args:
-                            await global_endpoint_manager.record_failure(args[0])
+                            await global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                         _handle_service_response_retries(request, client, service_response_retry_policy, e, *args)
                 # in case customer is not using aiohttp
                 except ImportError:
                     if args:
-                        await global_endpoint_manager.record_failure(args[0])
+                        await global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                     _handle_service_response_retries(request, client, service_response_retry_policy, e, *args)
 
 
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 69d18595e123..efdb71b8d9ea 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -16,7 +16,7 @@
 
 def create_errors():
     errors = []
-    error_codes = [403, 503]
+    error_codes = [403, 408, 500, 502, 503, 504]
     for error_code in error_codes:
         if error_code == 403:
             errors.append(CosmosHttpResponseError(
@@ -91,6 +91,7 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
 
         # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
+        # TODO: add logic here to deal with consecutive failures case
         perform_write_operation(
             write_operation,
             container,

From 2e5838cdb22bb1df9b6a9c982f5614e2af5dd63d Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 8 Aug 2025 11:50:42 -0400
Subject: [PATCH 20/68] update constant use, rollback session token PR change

---
 sdk/cosmos/azure-cosmos/azure/cosmos/documents.py           | 2 +-
 sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py b/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
index 4698c7378dee..8d314179ef38 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
@@ -361,7 +361,7 @@ def __init__(self) -> None:
         self.ProxyConfiguration: Optional[ProxyConfiguration] = None
         self.EnableEndpointDiscovery: bool = True
         self.PreferredLocations: List[str] = []
-        self.ExcludedLocations: Optional[List[str]] = None
+        self.ExcludedLocations: List[str] = []
         self.RetryOptions: RetryOptions = RetryOptions()
         self.DisableSSLVerification: bool = False
         self.UseMultipleWriteLocations: bool = False
diff --git a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
index 49981dba1db9..2adff2e79327 100644
--- a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
+++ b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
@@ -33,6 +33,7 @@
 from requests import Session
 
 from azure.cosmos import documents
+from azure.cosmos._constants import _Constants as Constants
 
 import test_config
 from azure.cosmos.exceptions import CosmosHttpResponseError
@@ -295,8 +296,7 @@ def transform_topology_ppaf_enabled( # cspell:disable-line
         if response.status_code == 200 and data:
             data = data.decode("utf-8")
             result = json.loads(data)
-            # TODO: need to verify below behavior against actual Cosmos DB service response
-            result["enablePerPartitionFailoverBehavior"] = True
+            result[Constants.EnablePerPartitionFailoverBehavior] = True
             FaultInjectionTransport.logger.info("Transformed Account Topology: {}".format(result))
             request: HttpRequest = response.request
             return FaultInjectionTransport.MockHttpResponse(request, 200, result)

From 8b7d1819eb7b780c1c3e61c859ebee4acbb98697 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 18 Aug 2025 19:50:31 -0400
Subject: [PATCH 21/68] threshold based retries

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_base.py |  21 ++++
 .../_endpoint_discovery_retry_policy.py       |  22 ++--
 ...anager_per_partition_automatic_failover.py |   1 -
 .../azure/cosmos/_retry_utility.py            |   9 +-
 .../cosmos/_service_response_retry_policy.py  |   6 +-
 .../_service_unavailable_retry_policy.py      |  15 +--
 .../cosmos/_timeout_failover_retry_policy.py  |  16 +--
 ..._per_partition_automatic_failover_async.py |   5 +-
 .../azure/cosmos/aio/_retry_utility_async.py  |   6 +-
 .../test_per_partition_automatic_failover.py  | 100 +++++++++++++++---
 ..._per_partition_automatic_failover_async.py |  71 +++++++++++--
 .../test_per_partition_circuit_breaker_mm.py  |   8 +-
 ..._per_partition_circuit_breaker_mm_async.py |   8 +-
 13 files changed, 211 insertions(+), 77 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
index 7a5df84f8816..d040d03c18c9 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
@@ -28,6 +28,7 @@
 import uuid
 import re
 import binascii
+import os
 from typing import Dict, Any, List, Mapping, Optional, Sequence, Union, Tuple, TYPE_CHECKING
 
 from urllib.parse import quote as urllib_quote
@@ -45,7 +46,10 @@
 if TYPE_CHECKING:
     from ._cosmos_client_connection import CosmosClientConnection
     from .aio._cosmos_client_connection_async import CosmosClientConnection as AsyncClientConnection
+    from ._global_partition_endpoint_manager_per_partition_automatic_failover import (
+        _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover)
     from ._request_object import RequestObject
+    from ._routing.routing_range import PartitionKeyRangeWrapper
 
 # pylint: disable=protected-access
 
@@ -933,3 +937,20 @@ def _build_properties_cache(properties: Dict[str, Any], container_link: str) ->
         "_self": properties.get("_self", None), "_rid": properties.get("_rid", None),
         "partitionKey": properties.get("partitionKey", None), "container_link": container_link
     }
+
+def try_ppaf_failover_threshold(
+        global_endpoint_manager: "_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover",
+        pk_range_wrapper: "PartitionKeyRangeWrapper",
+        request: "RequestObject"):
+    """Check if the PPAF threshold is reached for the current partition range, and mark endpoint unavailable if so.
+    """
+    # If PPAF is enabled, we track consecutive failures for certain exceptions, and only fail over at a partition
+    # level after the threshold is reached
+    if request and global_endpoint_manager.is_per_partition_automatic_failover_applicable(request):
+        if (global_endpoint_manager.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
+                >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
+                                      Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
+            # If the PPAF threshold is reached, we reset the count and retry to the next region
+            global_endpoint_manager.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+            partition_level_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+            partition_level_info.unavailable_regional_endpoints.add(request.location_endpoint_to_route)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index aabf247936fc..f29daf770891 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -62,17 +62,6 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
 
         self.failover_retry_count += 1
 
-        if self.request.location_endpoint_to_route:
-            if _OperationType.IsReadOnlyOperation(self.request.operation_type):
-                # Mark current read endpoint as unavailable
-                self.global_endpoint_manager.mark_endpoint_unavailable_for_read(
-                    self.request.location_endpoint_to_route,
-                    True)
-            else:
-                self.global_endpoint_manager.mark_endpoint_unavailable_for_write(
-                    self.request.location_endpoint_to_route,
-                    True)
-
         # set the refresh_needed flag to ensure that endpoint list is
         # refreshed with new writable and readable locations
         self.global_endpoint_manager.refresh_needed = True
@@ -85,6 +74,17 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
 
+        if self.request.location_endpoint_to_route:
+            if _OperationType.IsReadOnlyOperation(self.request.operation_type):
+                # Mark current read endpoint as unavailable
+                self.global_endpoint_manager.mark_endpoint_unavailable_for_read(
+                    self.request.location_endpoint_to_route,
+                    True)
+            else:
+                self.global_endpoint_manager.mark_endpoint_unavailable_for_write(
+                    self.request.location_endpoint_to_route,
+                    True)
+
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index a1bcae1da3d8..33a0bccae4d3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -120,7 +120,6 @@ def resolve_service_endpoint_for_partition(
                 partition_failover_info = PartitionLevelFailoverInfo()
                 partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
-            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index ee5ded634082..ba9800ba223f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -44,7 +44,7 @@
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
-# cspell:ignore ppaf
+# cspell:ignore PPAF,ppaf
 
 # args [0] is the request object
 # args [1] is the connection policy
@@ -191,10 +191,12 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
+                if args and global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]):
+                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
-                    # record the failure for circuit breaker tracking
+                    # record the failure for ppaf/circuit breaker tracking
                     global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = timeout_failover_retry_policy
             else:
@@ -275,7 +277,8 @@ def _handle_service_request_retries(
         raise exception
 
 def _handle_service_response_retries(request, client, response_retry_policy, exception, *args):
-    if request and (_has_read_retryable_headers(request.headers) or (args and is_write_retryable(args[0], client))):
+    if request and (_has_read_retryable_headers(request.headers) or (args and is_write_retryable(args[0], client)) or
+                    (args and client._global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]))):
         # we resolve the request endpoint to the next preferred region
         # once we are out of preferred regions we stop retrying
         retry_policy = response_retry_policy
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py
index b49b1dc35994..31f6e800d5e1 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py
@@ -6,9 +6,11 @@
 from the service, and as such we do not know what the output of the operation was. As such, we
 only do cross regional retries for read operations.
 """
+#cspell:ignore PPAF, ppaf
 
 import logging
 from azure.cosmos.documents import _OperationType
+from azure.cosmos._base import try_ppaf_failover_threshold
 
 class ServiceResponseRetryPolicy(object):
 
@@ -47,7 +49,9 @@ def ShouldRetry(self):
             return False
 
         if self.request:
-
+            # We track consecutive failures for per partition automatic failover, and only fail over at a partition
+            # level after the threshold is reached
+            try_ppaf_failover_threshold(self.global_endpoint_manager, self.pk_range_wrapper, self.request)
             if not _OperationType.IsReadOnlyOperation(self.request.operation_type) and not self.request.retry_write:
                 return False
             if self.request.retry_write and self.failover_retry_count + 1 >= self.max_write_retry_count:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index 3dc3df1aac70..b07a1b910034 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -4,6 +4,7 @@
 """Internal class for service unavailable retry policy implementation in the Azure
 Cosmos database service.
 """
+from azure.cosmos._base import try_ppaf_failover_threshold
 
 class _ServiceUnavailableRetryPolicy(object):
 
@@ -11,12 +12,11 @@ def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper,
         self.retry_after_in_milliseconds = 500
         self.global_endpoint_manager = global_endpoint_manager
         self.pk_range_wrapper = pk_range_wrapper
-        # If an account only has 1 region, then we still want to retry once on the same region
-        self._max_retry_attempt_count = (len(self.global_endpoint_manager.location_cache.read_regional_routing_contexts)
-                                         + 1)
         self.retry_count = 0
         self.connection_policy = connection_policy
         self.request = args[0] if args else None
+        # If an account only has 1 region, then we still want to retry once on the same region
+        self._max_retry_attempt_count = max(2, (len(self.global_endpoint_manager.location_cache.read_regional_routing_contexts)))
 
     def ShouldRetry(self, _exception):
         """Returns true if the request should retry based on the passed-in exception.
@@ -35,20 +35,13 @@ def ShouldRetry(self, _exception):
             return False
 
         if self.request:
+            try_ppaf_failover_threshold(self.global_endpoint_manager, self.pk_range_wrapper, self.request)
             location_endpoint = self.resolve_next_region_service_endpoint()
             self.request.route_to_location(location_endpoint)
         return True
 
     # This function prepares the request to go to the next region
     def resolve_next_region_service_endpoint(self):
-        if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
-            # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
-            # and resolve the service endpoint for the partition range - otherwise, continue with default retry logic
-            partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
-            partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
-            return self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request,
-                                                                                       self.pk_range_wrapper)
-
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
         # clear the last routed endpoint within same region since we are going to a new region now
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index 33b2596ccb5a..c7ee31026bc3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -4,9 +4,8 @@
 """Internal class for timeout failover retry policy implementation in the Azure
 Cosmos database service.
 """
-import os
 from azure.cosmos.documents import _OperationType
-from azure.cosmos._constants import _Constants as Constants
+from azure.cosmos._base import try_ppaf_failover_threshold
 
 # cspell:ignore PPAF, ppaf
 
@@ -40,18 +39,7 @@ def ShouldRetry(self, _exception):
         :returns: a boolean stating whether the request should be retried
         :rtype: bool
         """
-        # PPAF will have its own retry logic based on consecutive failures before failing over to the next region
-        if self.request and self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
-            if (self.global_endpoint_manager.ppaf_thresholds_tracker.get_pk_failures(self.pk_range_wrapper)
-                >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
-                                      Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
-                # If the PPAF threshold is reached, we reset the count and retry to the next region
-                self.global_endpoint_manager.ppaf_thresholds_tracker.clear_pk_failures(self.pk_range_wrapper)
-                partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[
-                    self.pk_range_wrapper]
-                partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
-                self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
-                return True
+        try_ppaf_failover_threshold(self.global_endpoint_manager, self.pk_range_wrapper, self.request)
 
         # we retry only if the request is a read operation or if it is a write operation with retry enabled
         if self.request and not self.is_operation_retryable():
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 175a3df025b6..4fac0d1159d4 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -134,7 +134,10 @@ def compute_available_preferred_regions(
         :return: A set of available regional endpoints.
         :rtype: Set[str]
         """
-        excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
+        if request.excluded_locations:
+            excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
+        else:
+            excluded_locations = self.location_cache.connection_policy.ExcludedLocations
         preferred_locations = self.PreferredLocations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
         available_regional_endpoints = {
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 5b8e041541d3..3a764676b817 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -189,12 +189,14 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
+                # if ppaf is applicable, we record the failure
+                if args and global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]):
+                    await global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
-                # record the failure for circuit breaker tracking
                 if args:
+                    # record the failure for ppaf/circuit breaker tracking
                     await global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
-                # TODO: change this to track errors for ppaf
                 retry_policy = timeout_failover_retry_policy
             else:
                 retry_policy = defaultRetry_policy
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index efdb71b8d9ea..743659e92d5b 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -4,8 +4,8 @@
 import uuid
 
 import pytest
-
 import test_config
+from azure.core.exceptions import ServiceResponseError
 from azure.cosmos import CosmosClient
 from azure.cosmos.exceptions import CosmosHttpResponseError
 from _fault_injection_transport import FaultInjectionTransport
@@ -14,19 +14,24 @@
 
 # cspell:disable
 
-def create_errors():
+def create_failover_errors():
     errors = []
-    error_codes = [403, 408, 500, 502, 503, 504]
+    error_codes = [403]
     for error_code in error_codes:
-        if error_code == 403:
-            errors.append(CosmosHttpResponseError(
-                status_code=error_code,
-                message="Some injected error.",
-                sub_status=3))
-        else:
-            errors.append(CosmosHttpResponseError(
-                status_code=error_code,
-                message="Some injected error."))
+        errors.append(CosmosHttpResponseError(
+            status_code=error_code,
+            message="Some injected error.",
+            sub_status=3))
+    return errors
+
+def create_threshold_errors():
+    errors = []
+    error_codes = [408, 500, 502, 503, 504]
+    for error_code in error_codes:
+        errors.append(CosmosHttpResponseError(
+            status_code=error_code,
+            message="Some injected error."))
+    errors.append(ServiceResponseError(message="Injected Service Response Error."))
     return errors
 
 # These tests assume that the configured live account has one main write region and one secondary read region.
@@ -74,10 +79,10 @@ def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
         custom_setup = self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
     def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         # This test validates that the partition info cache is updated correctly upon failures, and that the
-        # per-partition automatic failover logic routes requests to the next available regional endpoint
+        # per-partition automatic failover logic routes requests to the next available regional endpoint on 403.3 errors.
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, 1, write_operation == BATCH)
         container = setup['col']
@@ -91,7 +96,6 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
 
         # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
-        # TODO: add logic here to deal with consecutive failures case
         perform_write_operation(
             write_operation,
             container,
@@ -119,12 +123,74 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         assert partition_info.current_regional_endpoint is None
 
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
+    def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
+        # This test validates the consecutive failures logic is properly handled for per-partition automatic failover,
+        # and that the per-partition automatic failover logic routes requests to the next available regional endpoint
+        # after enough consecutive failures have occurred.
+        error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda)
+        container = setup['col']
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+
+        # Create a document to populate the per-partition GEM partition range info cache
+        fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
+                                                    'name': 'sample document', 'key': 'value'})
+        pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
+        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+
+
+        is_503 = hasattr(error, 'status_code') and error.status_code == 503
+        # Since 503 errors are retried by default, we each request counts as two failures
+        consecutive_failures = 3 if is_503 else 6
+
+        for i in range(consecutive_failures):
+            # We perform the write operation multiple times to check the consecutive failures logic
+            with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
+                perform_write_operation(write_operation,
+                                        container,
+                                        fault_injection_container,
+                                        doc_fail_id,
+                                        PK_VALUE)
+            assert exc_info.value == error
+        # Verify that the threshold for consecutive failures is updated
+        pk_range_wrappers = list(global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count.keys())
+        assert len(pk_range_wrappers) == 1
+        failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
+        assert failure_count == 6
+        # Run some more requests to the same partition to trigger the failover logic
+        for i in range(consecutive_failures):
+            with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
+                perform_write_operation(write_operation,
+                                        container,
+                                        fault_injection_container,
+                                        doc_fail_id,
+                                        PK_VALUE)
+            assert exc_info.value == error
+        # We should have marked the previous endpoint as unavailable after 10 successive failures
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
+        assert len(partition_info.unavailable_regional_endpoints) == 1
+        assert initial_endpoint in partition_info.unavailable_regional_endpoints
+        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+
+        # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
+        # This means we should have one extra failure - verify that the value makes sense
+        failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
+        assert failure_count == 1 if is_503 else 3
+
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
     def test_ppaf_exclude_regions(self, write_operation, error):
         # TODO: finish this test
         return
 
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
+    def test_ppaf_invalid_configs(self, write_operation, error):
+        # TODO: finish this test
+        return
+
 
 
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 0321a63fa799..99e433bc6af4 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -10,10 +10,12 @@
 
 import test_config
 from azure.core.pipeline.transport._aiohttp import AioHttpTransport
+from azure.core.exceptions import ServiceResponseError
+from azure.cosmos.exceptions import CosmosHttpResponseError
 from azure.cosmos.aio import CosmosClient
 from _fault_injection_transport import FaultInjectionTransport
 from _fault_injection_transport_async import FaultInjectionTransportAsync
-from test_per_partition_automatic_failover import create_errors
+from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors
 from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
@@ -70,14 +72,11 @@ async def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
         custom_setup = await self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
     async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation, error):
         # This test validates that the partition info cache is updated correctly upon failures, and that the
         # per-partition automatic failover logic routes requests to the next available regional endpoint
-        error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(
-            0,
-            error
-        ))
+        error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, 1, write_operation == BATCH)
         container = setup['col']
         fault_injection_container = custom_setup['col']
@@ -107,7 +106,7 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
             write_operation,
             container,
             fault_injection_container,
-            str(uuid.uuid4()),
+            doc_fail_id,
             PK_VALUE)
         partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
         # Verify that the cache is empty, since the request going to the second regional endpoint failed
@@ -116,8 +115,64 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
         assert initial_endpoint not in partition_info.unavailable_regional_endpoints
         assert partition_info.current_regional_endpoint is None
 
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
+    async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation, error):
+        # This test validates that the partition info cache is updated correctly upon failures, and that the
+        # per-partition automatic failover logic routes requests to the next available regional endpoint
+        error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda)
+        container = setup['col']
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+
+        # Create a document to populate the per-partition GEM partition range info cache
+        await fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
+                                                    'name': 'sample document', 'key': 'value'})
+        pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
+        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+
+        is_503 = hasattr(error, 'status_code') and error.status_code == 503
+        # Since 503 errors are retried by default, we each request counts as two failures
+        consecutive_failures = 3 if is_503 else 6
+
+        for i in range(consecutive_failures):
+            # We perform the write operation multiple times to check the consecutive failures logic
+            with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
+                await perform_write_operation(write_operation,
+                                              container,
+                                              fault_injection_container,
+                                              doc_fail_id,
+                                              PK_VALUE)
+            assert exc_info.value == error
+
+        # Verify that the threshold for consecutive failures is updated
+        pk_range_wrappers = list(global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count.keys())
+        assert len(pk_range_wrappers) == 1
+        failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
+        assert failure_count == 6
+        # Run some more requests to the same partition to trigger the failover logic
+        for i in range(consecutive_failures):
+            with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
+                await perform_write_operation(write_operation,
+                                              container,
+                                              fault_injection_container,
+                                              doc_fail_id,
+                                              PK_VALUE)
+            assert exc_info.value == error
+        # We should have marked the previous endpoint as unavailable after 10 successive failures
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
+        assert len(partition_info.unavailable_regional_endpoints) == 1
+        assert initial_endpoint in partition_info.unavailable_regional_endpoints
+        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+
+        # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
+        # This means we should have one extra failure - verify that the value makes sense
+        failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
+        assert failure_count == 1 if is_503 else 3
+
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_errors()))
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
     async def test_ppaf_exclude_regions_async(self, write_operation, error):
         # TODO: finish this test
         return
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index 3ed516a0a59c..c741d0b48af2 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -108,7 +108,7 @@ def perform_write_operation(operation, container, fault_injection_container, doc
     elif operation == UPSERT:
         resp = fault_injection_container.upsert_item(body=doc)
     elif operation == REPLACE:
-        container.create_item(body=doc)
+        container.upsert_item(body=doc)
         sleep(1)
         new_doc = {'id': doc_id,
                    'pk': pk,
@@ -116,11 +116,11 @@ def perform_write_operation(operation, container, fault_injection_container, doc
                    'key': 'value'}
         resp = fault_injection_container.replace_item(item=doc['id'], body=new_doc)
     elif operation == DELETE:
-        container.create_item(body=doc)
+        container.upsert_item(body=doc)
         sleep(1)
         resp = fault_injection_container.delete_item(item=doc['id'], partition_key=doc['pk'])
     elif operation == PATCH:
-        container.create_item(body=doc)
+        container.upsert_item(body=doc)
         sleep(1)
         operations = [{"op": "incr", "path": "/company", "value": 3}]
         resp = fault_injection_container.patch_item(item=doc['id'], partition_key=doc['pk'], patch_operations=operations)
@@ -134,7 +134,7 @@ def perform_write_operation(operation, container, fault_injection_container, doc
         resp = fault_injection_container.execute_item_batch(batch_operations, partition_key=doc['pk'])
     # this will need to be emulator only
     elif operation == DELETE_ALL_ITEMS_BY_PARTITION_KEY:
-        container.create_item(body=doc)
+        container.upsert_item(body=doc)
         resp = fault_injection_container.delete_all_items_by_partition_key(pk)
     if resp and expected_uri:
         validate_response_uri(resp, expected_uri)
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
index 60e2603c1842..40147314e4ff 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
@@ -33,7 +33,7 @@ async def perform_write_operation(operation, container, fault_injection_containe
     elif operation == UPSERT:
         resp = await fault_injection_container.upsert_item(body=doc)
     elif operation == REPLACE:
-        await container.create_item(body=doc)
+        await container.upsert_item(body=doc)
         new_doc = {'id': doc_id,
                    'pk': pk,
                    'name': 'sample document' + str(uuid),
@@ -41,11 +41,11 @@ async def perform_write_operation(operation, container, fault_injection_containe
         await asyncio.sleep(1)
         resp = await fault_injection_container.replace_item(item=doc['id'], body=new_doc)
     elif operation == DELETE:
-        await container.create_item(body=doc)
+        await container.upsert_item(body=doc)
         await asyncio.sleep(1)
         resp = await fault_injection_container.delete_item(item=doc['id'], partition_key=doc['pk'])
     elif operation == PATCH:
-        await container.create_item(body=doc)
+        await container.upsert_item(body=doc)
         await asyncio.sleep(1)
         operations = [{"op": "incr", "path": "/company", "value": 3}]
         resp = await fault_injection_container.patch_item(item=doc['id'], partition_key=doc['pk'], patch_operations=operations)
@@ -59,7 +59,7 @@ async def perform_write_operation(operation, container, fault_injection_containe
         resp = await fault_injection_container.execute_item_batch(batch_operations, partition_key=doc['pk'])
     # this will need to be emulator only
     elif operation == DELETE_ALL_ITEMS_BY_PARTITION_KEY:
-        await container.create_item(body=doc)
+        await container.upsert_item(body=doc)
         resp = await fault_injection_container.delete_all_items_by_partition_key(pk)
     if resp and expected_uri:
         validate_response_uri(resp, expected_uri)

From d8ed980f860260eb3711537c2e48e2f6b00e8364 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 18 Aug 2025 21:58:06 -0400
Subject: [PATCH 22/68] Update _base.py

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
index d040d03c18c9..cd2050cf1dab 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
@@ -52,6 +52,7 @@
     from ._routing.routing_range import PartitionKeyRangeWrapper
 
 # pylint: disable=protected-access
+#cspell:ignore PPAF, ppaf
 
 _COMMON_OPTIONS = {
     'initial_headers': 'initialHeaders',

From fcd5c60c44e6763a29c54ccd12d13f635ac92ba7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 19 Aug 2025 00:28:56 -0400
Subject: [PATCH 23/68] cspell, test fixes

---
 ...bal_partition_endpoint_manager_circuit_breaker.py |  2 ++
 ...point_manager_per_partition_automatic_failover.py |  1 +
 .../cosmos/_service_unavailable_retry_policy.py      |  9 ++++++++-
 .../azure/cosmos/_timeout_failover_retry_policy.py   |  9 ++++-----
 ...manager_per_partition_automatic_failover_async.py |  1 +
 .../test_timeout_and_failover_retry_policy_async.py  | 12 ++++++++----
 6 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
index 94fc4eafb98e..ca58e6eb2e1b 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_circuit_breaker.py
@@ -35,6 +35,8 @@
 if TYPE_CHECKING:
     from azure.cosmos._cosmos_client_connection import CosmosClientConnection
 
+#cspell:ignore ppcb
+
 class _GlobalPartitionEndpointManagerForCircuitBreaker(_GlobalEndpointManager):
     """
     This internal class implements the logic for partition endpoint management for
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 33a0bccae4d3..5514615a92a2 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -24,6 +24,7 @@
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
 # pylint: disable=name-too-long, protected-access
+#cspell:ignore PPAF, ppaf, ppcb
 
 class PartitionLevelFailoverInfo:
     """
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index b07a1b910034..cb3246e6db7a 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -4,8 +4,11 @@
 """Internal class for service unavailable retry policy implementation in the Azure
 Cosmos database service.
 """
+from azure.cosmos.documents import _OperationType
 from azure.cosmos._base import try_ppaf_failover_threshold
 
+#cspell:ignore ppaf
+
 class _ServiceUnavailableRetryPolicy(object):
 
     def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper, *args):
@@ -16,7 +19,11 @@ def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper,
         self.connection_policy = connection_policy
         self.request = args[0] if args else None
         # If an account only has 1 region, then we still want to retry once on the same region
-        self._max_retry_attempt_count = max(2, (len(self.global_endpoint_manager.location_cache.read_regional_routing_contexts)))
+        self._max_retry_attempt_count = max(2, len(self.global_endpoint_manager.location_cache
+                                                   .read_regional_routing_contexts))
+        if _OperationType.IsWriteOperation(self.request.operation_type):
+            self._max_retry_attempt_count = max(2, len(
+                self.global_endpoint_manager.location_cache.write_regional_routing_contexts))
 
     def ShouldRetry(self, _exception):
         """Returns true if the request should retry based on the passed-in exception.
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index c7ee31026bc3..952685ef5e06 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -21,13 +21,12 @@ def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper,
         # If an account only has 1 region, then we still want to retry once on the same region
         # We want this to be the default retry attempts as paging through a query means there are requests without
         # a request object
-        self._max_retry_attempt_count = len(self.global_endpoint_manager.location_cache
-                                            .read_regional_routing_contexts) + 1
+        self._max_retry_attempt_count = max(2, len(self.global_endpoint_manager.location_cache
+                                            .read_regional_routing_contexts))
        # If the request is a write operation, we only want to retry once if retry write is enabled
         if self.request and _OperationType.IsWriteOperation(self.request.operation_type):
-            self._max_retry_attempt_count = len(
-                self.global_endpoint_manager.location_cache.write_regional_routing_contexts
-            ) + 1
+            self._max_retry_attempt_count = max(2, len(
+                self.global_endpoint_manager.location_cache.write_regional_routing_contexts))
         self.retry_count = 0
         self.connection_policy = connection_policy
         self.request = args[0] if args else None
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 4fac0d1159d4..a31caaac8238 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -24,6 +24,7 @@
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
 # pylint: disable=name-too-long, protected-access
+#cspell:ignore PPAF, ppaf, ppcb
 
 class PartitionLevelFailoverInfo:
     """
diff --git a/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py b/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py
index 15c41cac9410..bf73d6c48a12 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py
@@ -38,6 +38,7 @@ async def setup():
 
 
 def error_codes():
+    return [503]
     return [408, 500, 502, 503]
 
 
@@ -79,9 +80,10 @@ async def test_timeout_failover_retry_policy_for_read_failure_async(self, setup,
 
         created_document = await setup[COLLECTION].create_item(body=document_definition)
         self.original_execute_function = _retry_utility_async.ExecuteFunctionAsync
+        num_exceptions = max(2, len(setup[COLLECTION].client_connection._global_endpoint_manager.location_cache.read_regional_routing_contexts))
         try:
-            # should retry once and then succeed
-            mf = self.MockExecuteFunction(self.original_execute_function, 2, error_code)
+            # should retry and then succeed
+            mf = self.MockExecuteFunction(self.original_execute_function, num_exceptions, error_code)
             _retry_utility_async.ExecuteFunctionAsync = mf
             await setup[COLLECTION].read_item(item=created_document['id'],
                                               partition_key=created_document['pk'])
@@ -131,9 +133,11 @@ async def test_timeout_failover_retry_policy_for_write_failure_async(self, setup
                                'key': 'value'}
 
         self.original_execute_function = _retry_utility_async.ExecuteFunctionAsync
+        num_exceptions_503 = max(2, len(setup[COLLECTION].client_connection._global_endpoint_manager.location_cache.write_regional_routing_contexts))
         try:
-            # timeouts should fail immediately for writes
-            mf = self.MockExecuteFunction(self.original_execute_function,0, error_code)
+            # timeouts should fail immediately for writes - except for 503s, which should retry on every preferred location
+            num_exceptions = num_exceptions_503 if error_code == 503 else 0
+            mf = self.MockExecuteFunction(self.original_execute_function,num_exceptions, error_code)
             _retry_utility_async.ExecuteFunctionAsync = mf
             try:
                 await setup[COLLECTION].create_item(body=document_definition)

From 467a95d78e90956b01ac72d134529bd6a9fd2d09 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 19 Aug 2025 11:43:27 -0400
Subject: [PATCH 24/68] Update _service_unavailable_retry_policy.py

---
 .../azure/cosmos/_service_unavailable_retry_policy.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index cb3246e6db7a..e212e869f6ef 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -21,7 +21,7 @@ def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper,
         # If an account only has 1 region, then we still want to retry once on the same region
         self._max_retry_attempt_count = max(2, len(self.global_endpoint_manager.location_cache
                                                    .read_regional_routing_contexts))
-        if _OperationType.IsWriteOperation(self.request.operation_type):
+        if self.request and _OperationType.IsWriteOperation(self.request.operation_type):
             self._max_retry_attempt_count = max(2, len(
                 self.global_endpoint_manager.location_cache.write_regional_routing_contexts))
 

From b9aa01ccbc7e01fee34c27784a38de0920757f17 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 19 Aug 2025 12:34:18 -0400
Subject: [PATCH 25/68] mypy, pylint

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_base.py      |  2 --
 ...int_manager_per_partition_automatic_failover.py | 14 ++++++++++++--
 .../azure/cosmos/_partition_health_tracker.py      |  2 +-
 .../azure-cosmos/azure/cosmos/_retry_utility.py    |  4 ++--
 .../azure/cosmos/_synchronized_request.py          |  5 +++--
 .../azure/cosmos/aio/_asynchronous_request.py      |  3 ++-
 ...nager_per_partition_automatic_failover_async.py | 14 ++++++++++++--
 7 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
index cd2050cf1dab..a4483a7277a8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
@@ -943,8 +943,6 @@ def try_ppaf_failover_threshold(
         global_endpoint_manager: "_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover",
         pk_range_wrapper: "PartitionKeyRangeWrapper",
         request: "RequestObject"):
-    """Check if the PPAF threshold is reached for the current partition range, and mark endpoint unavailable if so.
-    """
     # If PPAF is enabled, we track consecutive failures for certain exceptions, and only fail over at a partition
     # level after the threshold is reached
     if request and global_endpoint_manager.is_per_partition_automatic_failover_applicable(request):
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 5514615a92a2..f766805ac494 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -148,7 +148,12 @@ def compute_available_preferred_regions(
     def record_failure(self,
                        request: RequestObject,
                        pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
-        """Records a failure for the given partition key range and request."""
+        """Records a failure for the given partition key range and request.
+        :param RequestObject request: The request object containing the routing context.
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :return: None
+        """
         if self.is_per_partition_automatic_failover_applicable(request):
             if pk_range_wrapper is None:
                 pk_range_wrapper = self.create_pk_range_wrapper(request)
@@ -160,7 +165,12 @@ def record_failure(self,
     def record_success(self,
                        request: RequestObject,
                        pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
-        """Records a failure for the given partition key range and request."""
+        """Records a success for the given partition key range and request, effectively clearing the failure count.
+        :param RequestObject request: The request object containing the routing context.
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :return: None
+        """
         if self.is_per_partition_automatic_failover_applicable(request):
             if pk_range_wrapper is None:
                 pk_range_wrapper = self.create_pk_range_wrapper(request)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
index e7c44d01120f..f8bf79f956d8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
@@ -314,4 +314,4 @@ def clear_pk_failures(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
             del self.pk_range_wrapper_to_failure_count[pk_range_wrapper]
 
     def get_pk_failures(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> int:
-        return self.pk_range_wrapper_to_failure_count.get(pk_range_wrapper, 0)
\ No newline at end of file
+        return self.pk_range_wrapper_to_failure_count.get(pk_range_wrapper, 0)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 3033c8d337ed..0eadffda2cb4 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -294,8 +294,8 @@ def _handle_service_request_retries(
         raise exception
 
 def _handle_service_response_retries(request, client, response_retry_policy, exception, *args):
-    if request and (_has_read_retryable_headers(request.headers) or (args and is_write_retryable(args[0], client)) or
-                    (args and client._global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]))):
+    if request and (_has_read_retryable_headers(request.headers) or (args and (is_write_retryable(args[0], client) or
+                            client._global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0])))):
         # we resolve the request endpoint to the next preferred region
         # once we are out of preferred regions we stop retrying
         retry_policy = response_retry_policy
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index 8e70f443e052..38aea17474e1 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -25,15 +25,16 @@
 import json
 import time
 
+from typing import Any
 from urllib.parse import urlparse
 from azure.core.exceptions import DecodeError  # type: ignore
 from azure.core import PipelineClient
-from typing import Any
 
 from . import exceptions, http_constants, _retry_utility
 from .documents import ConnectionPolicy
 from ._request_object import RequestObject
-from ._global_partition_endpoint_manager_per_partition_automatic_failover import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover
+from ._global_partition_endpoint_manager_per_partition_automatic_failover import (
+    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover)
 
 
 def _is_readable_stream(obj):
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 310998728ed6..6d8c944404b8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -35,7 +35,8 @@
 from . import _retry_utility_async
 from ..documents import ConnectionPolicy
 from .._request_object import RequestObject
-from ._global_partition_endpoint_manager_per_partition_automatic_failover_async import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync
+from ._global_partition_endpoint_manager_per_partition_automatic_failover_async import (
+    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync)
 from .._synchronized_request import _request_body_from_data, _replace_url_prefix
 
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index a31caaac8238..837665386de5 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -150,7 +150,12 @@ def compute_available_preferred_regions(
     async def record_failure(self,
                              request: RequestObject,
                              pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
-        """Records a failure for the given partition key range and request."""
+        """Records a failure for the given partition key range and request.
+        :param RequestObject request: The request object containing the routing context.
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :return: None
+        """
         if self.is_per_partition_automatic_failover_applicable(request):
             if pk_range_wrapper is None:
                 pk_range_wrapper = await self.create_pk_range_wrapper(request)
@@ -162,7 +167,12 @@ async def record_failure(self,
     async def record_success(self,
                              request: RequestObject,
                              pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
-        """Records a failure for the given partition key range and request."""
+        """Records a success for the given partition key range and request, effectively clearing the failure count.
+        :param RequestObject request: The request object containing the routing context.
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :return: None
+        """
         if self.is_per_partition_automatic_failover_applicable(request):
             if pk_range_wrapper is None:
                 pk_range_wrapper = await self.create_pk_range_wrapper(request)

From 64f95e34ccb0a4078a2c7a4ff3ddf55e30c3591c Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 21 Aug 2025 01:00:23 -0400
Subject: [PATCH 26/68] 503 behavior change, use regional contexts

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_base.py |  6 +-
 .../_endpoint_discovery_retry_policy.py       |  8 ++-
 ...anager_per_partition_automatic_failover.py | 59 +++++++++++--------
 .../azure/cosmos/_partition_health_tracker.py |  2 +-
 .../azure/cosmos/_retry_utility.py            |  2 -
 .../_service_unavailable_retry_policy.py      | 14 ++++-
 ..._per_partition_automatic_failover_async.py | 58 ++++++++++--------
 .../azure/cosmos/aio/_retry_utility_async.py  |  2 -
 .../test_per_partition_automatic_failover.py  | 50 +++++++++++-----
 ..._per_partition_automatic_failover_async.py | 16 ++---
 10 files changed, 137 insertions(+), 80 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
index a4483a7277a8..3fe63759fc8a 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
@@ -952,4 +952,8 @@ def try_ppaf_failover_threshold(
             # If the PPAF threshold is reached, we reset the count and retry to the next region
             global_endpoint_manager.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
             partition_level_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
-            partition_level_info.unavailable_regional_endpoints.add(request.location_endpoint_to_route)
+            location = global_endpoint_manager.location_cache.get_location_from_endpoint(
+                str(request.location_endpoint_to_route))
+            regional_context = (global_endpoint_manager.location_cache.
+                                account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+            partition_level_info.unavailable_regional_endpoints[location] = regional_context
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index f29daf770891..df4def458554 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -67,10 +67,14 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
         self.global_endpoint_manager.refresh_needed = True
 
         # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
-        # and resolve the service endpoint for the partition range - otherwise, continue with the default retry logic
+        # and resolve the service endpoint for the partition range - otherwise, continue the default retry logic
         if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
             partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
-            partition_level_info.unavailable_regional_endpoints.add(self.request.location_endpoint_to_route)
+            location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
+                str(self.request.location_endpoint_to_route))
+            regional_context = (self.global_endpoint_manager.location_cache.
+                                account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+            partition_level_info.unavailable_regional_endpoints[location] = regional_context
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index f766805ac494..4def9cd308b6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -7,14 +7,14 @@
 import logging
 import threading
 
-from typing import Dict, Set, TYPE_CHECKING, Optional
+from typing import Dict, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
 from azure.cosmos._global_partition_endpoint_manager_circuit_breaker import \
     _GlobalPartitionEndpointManagerForCircuitBreaker
 from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
 from azure.cosmos.documents import _OperationType
-
+from azure.cosmos._location_cache import RegionalRoutingContext
 from azure.cosmos._request_object import RequestObject
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
 
@@ -32,28 +32,34 @@ class PartitionLevelFailoverInfo:
     Used to track the partition key range and the regions where it is available.
     """
     def __init__(self):
-        self.unavailable_regional_endpoints = set()
-        self.current_regional_endpoint = None
+        self.unavailable_regional_endpoints: Dict[str, RegionalRoutingContext] = {}
+        self.current_region = None
         self._lock = threading.Lock()
 
-    def try_move_to_next_location(self, available_account_regional_endpoints: Set[str], request: RequestObject) -> bool:
+    def try_move_to_next_location(
+            self,
+            available_account_regional_endpoints: Dict[str, str],
+            endpoint_region: str,
+            request: RequestObject) -> bool:
         with self._lock:
-            failed_regional_endpoint = request.location_endpoint_to_route
-            if failed_regional_endpoint != self.current_regional_endpoint:
-                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
-                request.route_to_location(self.current_regional_endpoint)
+            if endpoint_region != self.current_region:
+                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                # make the actual endpoint since the current_region is just West US
+                regional_endpoint = available_account_regional_endpoints[self.current_region]
+                request.route_to_location(regional_endpoint)
                 return True
 
             for regional_endpoint in available_account_regional_endpoints:
-                if regional_endpoint == self.current_regional_endpoint:
+                if regional_endpoint == self.current_region:
                     continue
 
                 if regional_endpoint in self.unavailable_regional_endpoints:
                     continue
 
-                self.current_regional_endpoint = regional_endpoint
-                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
-                request.route_to_location(self.current_regional_endpoint)
+                self.current_region = regional_endpoint
+                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                regional_endpoint = available_account_regional_endpoints[self.current_region]
+                request.route_to_location(regional_endpoint)
                 return True
 
             return False
@@ -102,10 +108,12 @@ def resolve_service_endpoint_for_partition(
                 logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
                 partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
                 if request.location_endpoint_to_route is not None:
-                    if request.location_endpoint_to_route in partition_failover_info.unavailable_regional_endpoints:
+                    endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
+                    if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
                         # If the current region is unavailable, we try to move to the next available region
                         if not partition_failover_info.try_move_to_next_location(
                                 self.compute_available_preferred_regions(request),
+                                endpoint_region,
                                 request):
                             logger.info("All available regions for partition are unavailable. Refreshing cache.")
                             # If no other region is available, we invalidate the cache and start once again from our
@@ -116,22 +124,26 @@ def resolve_service_endpoint_for_partition(
                                                                                                 pk_range_wrapper)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
-                        partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+                        endpoint_region = self.location_cache.get_location_from_endpoint(
+                            request.location_endpoint_to_route)
+                        partition_failover_info.current_region = endpoint_region
             else:
                 partition_failover_info = PartitionLevelFailoverInfo()
-                partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+                endpoint_region = self.location_cache.get_location_from_endpoint(
+                    request.location_endpoint_to_route)
+                partition_failover_info.current_region = endpoint_region
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
             self,
             request: RequestObject
-    ) -> Set[str]:
+    ) -> Dict[str, str]:
         """
         Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
         :param RequestObject request: The request object containing the routing context.
         :return: A set of available regional endpoints.
-        :rtype: Set[str]
+        :rtype: Dict[str, str]
         """
         if request.excluded_locations:
             excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
@@ -139,10 +151,10 @@ def compute_available_preferred_regions(
             excluded_locations = self.location_cache.connection_policy.ExcludedLocations
         preferred_locations = self.PreferredLocations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
-        available_regional_endpoints = {
-            self.location_cache.account_read_regional_routing_contexts_by_location[region].primary_endpoint
-            for region in available_regions
-        }
+        available_regional_endpoints = {}
+        for region, context in self.location_cache.account_read_regional_routing_contexts_by_location.items():
+            if region in available_regions:
+                available_regional_endpoints[region] = context.primary_endpoint
         return available_regional_endpoints
 
     def record_failure(self,
@@ -155,10 +167,11 @@ def record_failure(self,
         :return: None
         """
         if self.is_per_partition_automatic_failover_applicable(request):
+            location = self.location_cache.get_location_from_endpoint(str(request.location_endpoint_to_route))
             if pk_range_wrapper is None:
                 pk_range_wrapper = self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
-                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper)
+                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper, location)
         else:
             self.record_ppcb_failure(request, pk_range_wrapper)
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
index f8bf79f956d8..7358f605d67b 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
@@ -304,7 +304,7 @@ class _PPAFPartitionThresholdsTracker(object):
     def __init__(self) -> None:
         self.pk_range_wrapper_to_failure_count: Dict[PartitionKeyRangeWrapper, int] = {}
 
-    def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
+    def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper, location: Any) -> None:
         if pk_range_wrapper not in self.pk_range_wrapper_to_failure_count:
             self.pk_range_wrapper_to_failure_count[pk_range_wrapper] = 0
         self.pk_range_wrapper_to_failure_count[pk_range_wrapper] += 1
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 0eadffda2cb4..df8b8bb04bb0 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -208,8 +208,6 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
-                if args and global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]):
-                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index e212e869f6ef..245f72bf5489 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -5,7 +5,6 @@
 Cosmos database service.
 """
 from azure.cosmos.documents import _OperationType
-from azure.cosmos._base import try_ppaf_failover_threshold
 
 #cspell:ignore ppaf
 
@@ -42,7 +41,18 @@ def ShouldRetry(self, _exception):
             return False
 
         if self.request:
-            try_ppaf_failover_threshold(self.global_endpoint_manager, self.pk_range_wrapper, self.request)
+            # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
+            # and resolve the service endpoint for the partition range - otherwise, continue the default retry logic
+            if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+                partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[
+                    self.pk_range_wrapper]
+                location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
+                    str(self.request.location_endpoint_to_route))
+                regional_context = (self.global_endpoint_manager.location_cache.
+                                    account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+                partition_level_info.unavailable_regional_endpoints[location] = regional_context
+                self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
+                return True
             location_endpoint = self.resolve_next_region_service_endpoint()
             self.request.route_to_location(location_endpoint)
         return True
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 837665386de5..fe28ab2c71b6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -13,7 +13,7 @@
 from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import \
     _GlobalPartitionEndpointManagerForCircuitBreakerAsync
 from azure.cosmos.documents import _OperationType
-
+from azure.cosmos._location_cache import RegionalRoutingContext
 from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
 from azure.cosmos._request_object import RequestObject
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
@@ -32,28 +32,34 @@ class PartitionLevelFailoverInfo:
     Used to track the partition key range and the regions where it is available.
     """
     def __init__(self):
-        self.unavailable_regional_endpoints = set()
-        self.current_regional_endpoint = None
+        self.unavailable_regional_endpoints: Dict[str, RegionalRoutingContext] = {}
+        self.current_region = None
         self._lock = threading.Lock()
 
-    def try_move_to_next_location(self, available_account_regional_endpoints: Set[str], request: RequestObject) -> bool:
+    def try_move_to_next_location(
+            self,
+            available_account_regional_endpoints: Dict[str, str],
+            endpoint_region: str,
+            request: RequestObject) -> bool:
         with self._lock:
-            failed_regional_endpoint = request.location_endpoint_to_route
-            if failed_regional_endpoint != self.current_regional_endpoint:
-                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
-                request.route_to_location(self.current_regional_endpoint)
+            if endpoint_region != self.current_region:
+                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                # make the actual endpoint since the current_region is just West US
+                regional_endpoint = available_account_regional_endpoints[self.current_region]
+                request.route_to_location(regional_endpoint)
                 return True
 
             for regional_endpoint in available_account_regional_endpoints:
-                if regional_endpoint == self.current_regional_endpoint:
+                if regional_endpoint == self.current_region:
                     continue
 
                 if regional_endpoint in self.unavailable_regional_endpoints:
                     continue
 
-                self.current_regional_endpoint = regional_endpoint
-                logger.info("Moving to next available regional endpoint: %s", self.current_regional_endpoint)
-                request.route_to_location(self.current_regional_endpoint)
+                self.current_region = regional_endpoint
+                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                regional_endpoint = available_account_regional_endpoints[self.current_region]
+                request.route_to_location(regional_endpoint)
                 return True
 
             return False
@@ -103,10 +109,12 @@ def resolve_service_endpoint_for_partition(
                 logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
                 partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
                 if request.location_endpoint_to_route is not None:
-                    if request.location_endpoint_to_route in partition_failover_info.unavailable_regional_endpoints:
+                    endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
+                    if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
                         # If the current region is unavailable, we try to move to the next available region
                         if not partition_failover_info.try_move_to_next_location(
                                 self.compute_available_preferred_regions(request),
+                                endpoint_region,
                                 request):
                             logger.info("All available regions for partition are unavailable. Refreshing cache.")
                             # If no other region is available, we invalidate the cache and start once again from our
@@ -117,23 +125,26 @@ def resolve_service_endpoint_for_partition(
                                                                                                 pk_range_wrapper)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
-                        partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+                        endpoint_region = self.location_cache.get_location_from_endpoint(
+                            request.location_endpoint_to_route)
+                        partition_failover_info.current_region = endpoint_region
             else:
                 partition_failover_info = PartitionLevelFailoverInfo()
-                partition_failover_info.current_regional_endpoint = request.location_endpoint_to_route
+                endpoint_region = self.location_cache.get_location_from_endpoint(
+                    request.location_endpoint_to_route)
+                partition_failover_info.current_region = endpoint_region
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
-            return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
     def compute_available_preferred_regions(
             self,
             request: RequestObject
-    ) -> Set[str]:
+    ) -> Dict[str, str]:
         """
         Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
         :param RequestObject request: The request object containing the routing context.
         :return: A set of available regional endpoints.
-        :rtype: Set[str]
+        :rtype: Dict[str, str]
         """
         if request.excluded_locations:
             excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
@@ -141,10 +152,10 @@ def compute_available_preferred_regions(
             excluded_locations = self.location_cache.connection_policy.ExcludedLocations
         preferred_locations = self.PreferredLocations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
-        available_regional_endpoints = {
-            self.location_cache.account_read_regional_routing_contexts_by_location[region].primary_endpoint
-            for region in available_regions
-        }
+        available_regional_endpoints = {}
+        for region, context in self.location_cache.account_read_regional_routing_contexts_by_location.items():
+            if region in available_regions:
+                available_regional_endpoints[region] = context.primary_endpoint
         return available_regional_endpoints
 
     async def record_failure(self,
@@ -157,10 +168,11 @@ async def record_failure(self,
         :return: None
         """
         if self.is_per_partition_automatic_failover_applicable(request):
+            location = self.location_cache.get_location_from_endpoint(str(request.location_endpoint_to_route))
             if pk_range_wrapper is None:
                 pk_range_wrapper = await self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
-                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper)
+                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper, location)
         else:
             await self.record_ppcb_failure(request, pk_range_wrapper)
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 09c04ae9009a..6afc01c53f02 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -207,8 +207,6 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
                 # if ppaf is applicable, we record the failure
-                if args and global_endpoint_manager.is_per_partition_automatic_failover_applicable(args[0]):
-                    await global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 743659e92d5b..8c363fb10f31 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -16,7 +16,7 @@
 
 def create_failover_errors():
     errors = []
-    error_codes = [403]
+    error_codes = [403, 503]
     for error_code in error_codes:
         errors.append(CosmosHttpResponseError(
             status_code=error_code,
@@ -26,7 +26,7 @@ def create_failover_errors():
 
 def create_threshold_errors():
     errors = []
-    error_codes = [408, 500, 502, 503, 504]
+    error_codes = [408, 500, 502, 504]
     for error_code in error_codes:
         errors.append(CosmosHttpResponseError(
             status_code=error_code,
@@ -43,12 +43,13 @@ class TestPerPartitionAutomaticFailover:
     TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
-    def setup_method_with_custom_transport(self, custom_transport, default_endpoint=host, **kwargs):
+    def setup_method_with_custom_transport(self, custom_transport, default_endpoint=host, read_first=False, **kwargs):
+        regions = [REGION_2, REGION_1] if read_first else [REGION_1, REGION_2]
         container_id = kwargs.pop("container_id", None)
         if not container_id:
             container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
         client = CosmosClient(default_endpoint, self.master_key, consistency_level="Session",
-                              preferred_locations=[REGION_1, REGION_2],
+                              preferred_locations=regions,
                               transport=custom_transport, **kwargs)
         db = client.get_database_client(self.TEST_DATABASE_ID)
         container = db.get_container_client(container_id)
@@ -93,7 +94,7 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
                                                     'name': 'sample document', 'key': 'value'})
         pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
-        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+        initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
 
         # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
         perform_write_operation(
@@ -105,8 +106,8 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
         # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
         assert len(partition_info.unavailable_regional_endpoints) == 1
-        assert initial_endpoint in partition_info.unavailable_regional_endpoints
-        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+        assert initial_region in partition_info.unavailable_regional_endpoints
+        assert initial_region != partition_info.current_region # west us 3 != west us
 
         # Now we run another request to see how the cache gets updated
         perform_write_operation(
@@ -119,8 +120,8 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         # Verify that the cache is empty, since the request going to the second regional endpoint failed
         # Once we reach the point of all available regions being marked as unavailable, the cache is cleared
         assert len(partition_info.unavailable_regional_endpoints) == 0
-        assert initial_endpoint not in partition_info.unavailable_regional_endpoints
-        assert partition_info.current_regional_endpoint is None
+        assert initial_region not in partition_info.unavailable_regional_endpoints
+        assert partition_info.current_region is None
 
 
     @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
@@ -138,8 +139,7 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
                                                     'name': 'sample document', 'key': 'value'})
         pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
-        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
-
+        initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
 
         is_503 = hasattr(error, 'status_code') and error.status_code == 503
         # Since 503 errors are retried by default, we each request counts as two failures
@@ -172,8 +172,8 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
         # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
         assert len(partition_info.unavailable_regional_endpoints) == 1
-        assert initial_endpoint in partition_info.unavailable_regional_endpoints
-        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+        assert initial_region in partition_info.unavailable_regional_endpoints
+        assert initial_region != partition_info.current_region # west us 3 != west us
 
         # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
         # This means we should have one extra failure - verify that the value makes sense
@@ -185,9 +185,27 @@ def test_ppaf_exclude_regions(self, write_operation, error):
         # TODO: finish this test
         return
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
-    def test_ppaf_invalid_configs(self, write_operation, error):
-        # TODO: finish this test
+    def test_ppaf_session_unavailable_retry(self):
+        # For this test, the main requirement is to have 3 regions total in the account: A, B and C.
+        # Writes go to region A, and reads go to region C. Preferred locations are set to C, B, A in order.
+        # This test depends on the fact that the chosen region for the failover is region B and not region C.
+        # We will inject a 403.3 error on region A, marking it as unavailable with PPAF. We verify the retry goes
+        # to region B next. Next we inject a 404.1002 to a read in the same partition, which should retry to region B
+        # as well since A was marked as unavailable in the context of PPAF.
+
+        # For this test, we have two regions in the account West US 3 (write) and West US (read).
+        # Writes go to West US 3, and reads go to region C - preferred locations are set to that order.
+        # This test depends on the fact that the chosen region for the failover is region B and not region C.
+        # We will inject a 403.3 error on region A, marking it as unavailable with PPAF. We verify the retry goes
+        # to region B next. Next we inject a 404.1002 to a read in the same partition, which should retry to region B
+        # as well since A was marked as unavailable in the context of PPAF.
+
+        # Account config has 2 regions: West US 3 (A) and West US (B).
+        error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, read_first=True)
+        container = setup['col']
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
         return
 
 
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 99e433bc6af4..a7949038b625 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -86,7 +86,7 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
         await fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
                                                     'name': 'sample document', 'key': 'value'})
         pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
-        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+        initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
 
         # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
         await perform_write_operation(
@@ -98,8 +98,8 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
         partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
         # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
         assert len(partition_info.unavailable_regional_endpoints) == 1
-        assert initial_endpoint in partition_info.unavailable_regional_endpoints
-        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+        assert initial_region in partition_info.unavailable_regional_endpoints
+        assert initial_region != partition_info.current_region # west us 3 != west us
 
         # Now we run another request to see how the cache gets updated
         await perform_write_operation(
@@ -112,8 +112,8 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
         # Verify that the cache is empty, since the request going to the second regional endpoint failed
         # Once we reach the point of all available regions being marked as unavailable, the cache is cleared
         assert len(partition_info.unavailable_regional_endpoints) == 0
-        assert initial_endpoint not in partition_info.unavailable_regional_endpoints
-        assert partition_info.current_regional_endpoint is None
+        assert initial_region not in partition_info.unavailable_regional_endpoints
+        assert partition_info.current_region is None
 
     @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
     async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation, error):
@@ -129,7 +129,7 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         await fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
                                                     'name': 'sample document', 'key': 'value'})
         pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
-        initial_endpoint = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_regional_endpoint
+        initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
 
         is_503 = hasattr(error, 'status_code') and error.status_code == 503
         # Since 503 errors are retried by default, we each request counts as two failures
@@ -163,8 +163,8 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
         # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
         assert len(partition_info.unavailable_regional_endpoints) == 1
-        assert initial_endpoint in partition_info.unavailable_regional_endpoints
-        assert initial_endpoint != partition_info.current_regional_endpoint # west us 3 != west us
+        assert initial_region in partition_info.unavailable_regional_endpoints
+        assert initial_region != partition_info.current_region # west us 3 != west us
 
         # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
         # This means we should have one extra failure - verify that the value makes sense

From d05fc5e2c82ad4287ce4e61f874284b5afc0b7c7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 21 Aug 2025 10:03:36 -0400
Subject: [PATCH 27/68] mypy, pylint, tests

---
 ...int_manager_per_partition_automatic_failover.py |  3 +--
 .../azure/cosmos/_partition_health_tracker.py      |  2 +-
 .../azure/cosmos/_synchronized_request.py          | 14 +-------------
 .../azure/cosmos/aio/_asynchronous_request.py      | 14 +-------------
 ...nager_per_partition_automatic_failover_async.py |  5 ++---
 .../test_timeout_and_failover_retry_policy.py      | 11 +++++++----
 ...test_timeout_and_failover_retry_policy_async.py |  3 +--
 7 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 4def9cd308b6..aecbe2209c9d 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -167,11 +167,10 @@ def record_failure(self,
         :return: None
         """
         if self.is_per_partition_automatic_failover_applicable(request):
-            location = self.location_cache.get_location_from_endpoint(str(request.location_endpoint_to_route))
             if pk_range_wrapper is None:
                 pk_range_wrapper = self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
-                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper, location)
+                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper)
         else:
             self.record_ppcb_failure(request, pk_range_wrapper)
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
index 7358f605d67b..f8bf79f956d8 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
@@ -304,7 +304,7 @@ class _PPAFPartitionThresholdsTracker(object):
     def __init__(self) -> None:
         self.pk_range_wrapper_to_failure_count: Dict[PartitionKeyRangeWrapper, int] = {}
 
-    def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper, location: Any) -> None:
+    def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
         if pk_range_wrapper not in self.pk_range_wrapper_to_failure_count:
             self.pk_range_wrapper_to_failure_count[pk_range_wrapper] = 0
         self.pk_range_wrapper_to_failure_count[pk_range_wrapper] += 1
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index 38aea17474e1..e6109b5bd621 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -25,16 +25,10 @@
 import json
 import time
 
-from typing import Any
 from urllib.parse import urlparse
 from azure.core.exceptions import DecodeError  # type: ignore
-from azure.core import PipelineClient
 
 from . import exceptions, http_constants, _retry_utility
-from .documents import ConnectionPolicy
-from ._request_object import RequestObject
-from ._global_partition_endpoint_manager_per_partition_automatic_failover import (
-    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover)
 
 
 def _is_readable_stream(obj):
@@ -69,13 +63,7 @@ def _request_body_from_data(data):
     return None
 
 
-def _Request(
-        global_endpoint_manager: _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover,
-        request_params: RequestObject,
-        connection_policy: ConnectionPolicy,
-        pipeline_client: PipelineClient,
-        request: Any,
-        **kwargs): # pylint: disable=too-many-statements
+def _Request(global_endpoint_manager, request_params, connection_policy, pipeline_client, request, **kwargs): # pylint: disable=too-many-statements
     """Makes one http request using the requests module.
 
     :param _GlobalEndpointManager global_endpoint_manager:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 6d8c944404b8..1cd2a22039b4 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -24,29 +24,17 @@
 import copy
 import json
 import time
-from typing import Any
 
 from urllib.parse import urlparse
-from azure.core import AsyncPipelineClient
 from azure.core.exceptions import DecodeError  # type: ignore
 
 from .. import exceptions
 from .. import http_constants
 from . import _retry_utility_async
-from ..documents import ConnectionPolicy
-from .._request_object import RequestObject
-from ._global_partition_endpoint_manager_per_partition_automatic_failover_async import (
-    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync)
 from .._synchronized_request import _request_body_from_data, _replace_url_prefix
 
 
-async def _Request(
-        global_endpoint_manager: _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync,
-        request_params: RequestObject,
-        connection_policy: ConnectionPolicy,
-        pipeline_client: AsyncPipelineClient,
-        request: Any,
-        **kwargs): # pylint: disable=too-many-statements
+async def _Request(global_endpoint_manager, request_params, connection_policy, pipeline_client, request, **kwargs): # pylint: disable=too-many-statements
     """Makes one http request using the requests module.
 
     :param _GlobalEndpointManager global_endpoint_manager:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index fe28ab2c71b6..5b805b2e145d 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -7,7 +7,7 @@
 import logging
 import threading
 
-from typing import Dict, Set, TYPE_CHECKING, Optional
+from typing import Dict, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
 from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import \
@@ -168,11 +168,10 @@ async def record_failure(self,
         :return: None
         """
         if self.is_per_partition_automatic_failover_applicable(request):
-            location = self.location_cache.get_location_from_endpoint(str(request.location_endpoint_to_route))
             if pk_range_wrapper is None:
                 pk_range_wrapper = await self.create_pk_range_wrapper(request)
             if pk_range_wrapper:
-                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper, location)
+                self.ppaf_thresholds_tracker.add_failure(pk_range_wrapper)
         else:
             await self.record_ppcb_failure(request, pk_range_wrapper)
 
diff --git a/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy.py
index 4c21ed121441..69cd625e5104 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy.py
@@ -79,9 +79,10 @@ def test_timeout_failover_retry_policy_for_read_failure(self, setup, error_code)
 
         created_document = setup[COLLECTION].create_item(body=document_definition)
         self.original_execute_function = _retry_utility.ExecuteFunction
+        num_exceptions = max(2, len(setup[COLLECTION].client_connection._global_endpoint_manager.location_cache.read_regional_routing_contexts))
         try:
-            # should retry once and then fail
-            mf = self.MockExecuteFunction(self.original_execute_function, 2, error_code)
+            # should retry and then fail
+            mf = self.MockExecuteFunction(self.original_execute_function, num_exceptions, error_code)
             _retry_utility.ExecuteFunction = mf
             setup[COLLECTION].read_item(item=created_document['id'],
                                               partition_key=created_document['pk'])
@@ -99,9 +100,11 @@ def test_timeout_failover_retry_policy_for_write_failure(self, setup, error_code
                                'key': 'value'}
 
         self.original_execute_function = _retry_utility.ExecuteFunction
+        num_exceptions_503 = max(2, len(setup[COLLECTION].client_connection._global_endpoint_manager.location_cache.write_regional_routing_contexts))
         try:
-            # timeouts should fail immediately for writes
-            mf = self.MockExecuteFunction(self.original_execute_function,0, error_code)
+            # timeouts should fail immediately for writes - except for 503s, which should retry on every preferred location
+            num_exceptions = num_exceptions_503 if error_code == 503 else 0
+            mf = self.MockExecuteFunction(self.original_execute_function, num_exceptions, error_code)
             _retry_utility.ExecuteFunction = mf
             try:
                 setup[COLLECTION].create_item(body=document_definition)
diff --git a/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py b/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py
index bf73d6c48a12..506fa826c29d 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_timeout_and_failover_retry_policy_async.py
@@ -38,7 +38,6 @@ async def setup():
 
 
 def error_codes():
-    return [503]
     return [408, 500, 502, 503]
 
 
@@ -137,7 +136,7 @@ async def test_timeout_failover_retry_policy_for_write_failure_async(self, setup
         try:
             # timeouts should fail immediately for writes - except for 503s, which should retry on every preferred location
             num_exceptions = num_exceptions_503 if error_code == 503 else 0
-            mf = self.MockExecuteFunction(self.original_execute_function,num_exceptions, error_code)
+            mf = self.MockExecuteFunction(self.original_execute_function, num_exceptions, error_code)
             _retry_utility_async.ExecuteFunctionAsync = mf
             try:
                 await setup[COLLECTION].create_item(body=document_definition)

From 85b2007d4f1877216e7a11eacff25df1cf249686 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 21 Aug 2025 12:01:10 -0400
Subject: [PATCH 28/68] special-casing 503s

---
 .../azure-cosmos/tests/test_circuit_breaker_emulator.py       | 4 ++++
 .../azure-cosmos/tests/test_circuit_breaker_emulator_async.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py
index 0eb863e6ceac..a32600d239c6 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py
@@ -121,6 +121,8 @@ def test_write_consecutive_failure_threshold_delete_all_items_by_pk_sm(self, set
 
     @pytest.mark.parametrize("error", create_errors())
     def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm(self, setup_teardown, error):
+        if error.status_code == 503:
+            pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, mm=True)
         fault_injection_container = custom_setup['col']
@@ -176,6 +178,8 @@ def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm(self, set
 
     @pytest.mark.parametrize("error", create_errors())
     def test_write_failure_rate_threshold_delete_all_items_by_pk_mm(self, setup_teardown, error):
+        if error.status_code == 503:
+            pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, mm=True)
         fault_injection_container = custom_setup['col']
diff --git a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py
index 601aca23bfe5..504a496319aa 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py
@@ -125,6 +125,8 @@ async def test_write_consecutive_failure_threshold_delete_all_items_by_pk_sm_asy
 
     @pytest.mark.parametrize("error", create_errors())
     async def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm_async(self, setup_teardown, error):
+        if error.status_code == 503:
+            pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, mm=True)
         fault_injection_container = custom_setup['col']
@@ -181,6 +183,8 @@ async def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm_asy
 
     @pytest.mark.parametrize("error", create_errors())
     async def test_write_failure_rate_threshold_delete_all_items_by_pk_mm_async(self, setup_teardown, error):
+        if error.status_code == 503:
+            pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, mm=True)
         fault_injection_container = custom_setup['col']

From f8fa70a5139900306ecd9fdc0d8f814da0d27278 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 21 Aug 2025 12:57:59 -0400
Subject: [PATCH 29/68] small fix

---
 .../azure-cosmos/tests/test_circuit_breaker_emulator.py       | 4 ++--
 .../azure-cosmos/tests/test_circuit_breaker_emulator_async.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py
index a32600d239c6..42ab6bcd722d 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator.py
@@ -121,7 +121,7 @@ def test_write_consecutive_failure_threshold_delete_all_items_by_pk_sm(self, set
 
     @pytest.mark.parametrize("error", create_errors())
     def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm(self, setup_teardown, error):
-        if error.status_code == 503:
+        if hasattr(error, "status_code") and error.status_code == 503:
             pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, mm=True)
@@ -178,7 +178,7 @@ def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm(self, set
 
     @pytest.mark.parametrize("error", create_errors())
     def test_write_failure_rate_threshold_delete_all_items_by_pk_mm(self, setup_teardown, error):
-        if error.status_code == 503:
+        if hasattr(error, "status_code") and error.status_code == 503:
             pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, mm=True)
diff --git a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py
index 504a496319aa..46a211f269bb 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_circuit_breaker_emulator_async.py
@@ -125,7 +125,7 @@ async def test_write_consecutive_failure_threshold_delete_all_items_by_pk_sm_asy
 
     @pytest.mark.parametrize("error", create_errors())
     async def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm_async(self, setup_teardown, error):
-        if error.status_code == 503:
+        if hasattr(error, "status_code") and error.status_code == 503:
             pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, mm=True)
@@ -183,7 +183,7 @@ async def test_write_consecutive_failure_threshold_delete_all_items_by_pk_mm_asy
 
     @pytest.mark.parametrize("error", create_errors())
     async def test_write_failure_rate_threshold_delete_all_items_by_pk_mm_async(self, setup_teardown, error):
-        if error.status_code == 503:
+        if hasattr(error, "status_code") and error.status_code == 503:
             pytest.skip("ServiceUnavailableError will do a cross-region retry, so it has to be special cased.")
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, mm=True)

From e5c5ac53f55aeb3e9c23a7f4d87aedb37a4e43d9 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 21 Aug 2025 13:46:02 -0400
Subject: [PATCH 30/68] exclude region tests

---
 .../azure/cosmos/cosmos_client.py             |  4 +-
 ..._per_partition_automatic_failover_async.py | 47 ++++++++++++++-----
 .../test_per_partition_circuit_breaker_mm.py  |  9 ++++
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py b/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py
index af19d54cf671..09eaa082e91e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py
@@ -95,7 +95,9 @@ def _build_connection_policy(kwargs: Dict[str, Any]) -> ConnectionPolicy:
     policy.EnableEndpointDiscovery = kwargs.pop('enable_endpoint_discovery', policy.EnableEndpointDiscovery)
     policy.PreferredLocations = kwargs.pop('preferred_locations', policy.PreferredLocations)
     # TODO: Consider storing callback method instead, such as 'Supplier' in JAVA SDK
-    policy.ExcludedLocations = kwargs.pop('excluded_locations', policy.ExcludedLocations)
+    excluded_locations = kwargs.pop('excluded_locations', policy.ExcludedLocations)
+    if excluded_locations:
+        policy.ExcludedLocations = excluded_locations
     policy.UseMultipleWriteLocations = kwargs.pop('multiple_write_locations', policy.UseMultipleWriteLocations)
 
     # SSL config
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index a7949038b625..c18e6ed9abb4 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -13,10 +13,11 @@
 from azure.core.exceptions import ServiceResponseError
 from azure.cosmos.exceptions import CosmosHttpResponseError
 from azure.cosmos.aio import CosmosClient
+from azure.cosmos._request_object import RequestObject
 from _fault_injection_transport import FaultInjectionTransport
 from _fault_injection_transport_async import FaultInjectionTransportAsync
 from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors
-from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors
+from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors, write_operations_and_boolean
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
 # cspell:disable
@@ -31,15 +32,23 @@ class TestPerPartitionAutomaticFailoverAsync:
     TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
-    async def setup_method_with_custom_transport(self, custom_transport: Optional[AioHttpTransport], default_endpoint=host, **kwargs):
+    async def setup_method_with_custom_transport(self, custom_transport: Optional[AioHttpTransport],
+                                                 default_endpoint=host, read_first=False, **kwargs):
+        regions = [REGION_2, REGION_1] if read_first else [REGION_1, REGION_2]
         container_id = kwargs.pop("container_id", None)
+        exclude_client_regions = kwargs.pop("exclude_client_regions", False)
+        excluded_regions = []
+        if exclude_client_regions:
+            excluded_regions = [REGION_2]
         if not container_id:
             container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
         client = CosmosClient(default_endpoint, self.master_key, consistency_level="Session",
-                              preferred_locations=[REGION_1, REGION_2],
+                              preferred_locations=regions,
+                              excluded_locations=excluded_regions,
                               transport=custom_transport, **kwargs)
         db = client.get_database_client(self.TEST_DATABASE_ID)
         container = db.get_container_client(container_id)
+        await client.__aenter__()
         return {"client": client, "db": db, "col": container}
     
     @staticmethod
@@ -47,7 +56,7 @@ async def cleanup_method(initialized_objects: Dict[str, Any]):
         method_client: CosmosClient = initialized_objects["client"]
         await method_client.close()
 
-    async def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
+    async def setup_info(self, error=None, max_count=None, is_batch=False, exclude_client_regions=False, **kwargs):
         custom_transport = FaultInjectionTransportAsync()
         # two documents targeted to same partition, one will always fail and the other will succeed
         doc_fail_id = str(uuid.uuid4())
@@ -59,8 +68,9 @@ async def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
             success_response = FaultInjectionTransportAsync.MockHttpResponse(mock_request, 200, [{"statusCode": 200}],)
         else:
             success_response = FaultInjectionTransportAsync.MockHttpResponse(mock_request, 200)
-        custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
-                                   after_max_count=success_response)
+        if error:
+            custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
+                                       after_max_count=success_response)
         is_get_account_predicate = lambda r: FaultInjectionTransportAsync.predicate_is_database_account_call(r)
         # Set the database account response to have PPAF enabled
         ppaf_enabled_database_account = \
@@ -68,8 +78,10 @@ async def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
         custom_transport.add_response_transformation(
             is_get_account_predicate,
             ppaf_enabled_database_account)
-        setup = await self.setup_method_with_custom_transport(None, default_endpoint=self.host, **kwargs)
-        custom_setup = await self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
+        setup = await self.setup_method_with_custom_transport(None, default_endpoint=self.host,
+                                                        exclude_client_regions=exclude_client_regions, **kwargs)
+        custom_setup = await self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host,
+                                                        exclude_client_regions=exclude_client_regions, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
     @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
@@ -171,11 +183,20 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
         assert failure_count == 1 if is_503 else 3
 
-
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
-    async def test_ppaf_exclude_regions_async(self, write_operation, error):
-        # TODO: finish this test
-        return
+    @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
+    async def test_ppaf_exclude_regions_async(self, write_operation, exclude_client_regions):
+        # This test validates that the per-partition automatic failover logic does not apply to configs without enough regions.
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(exclude_client_regions=exclude_client_regions)
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+        # Check that computing valid regions for PPAF only returns a single region
+        request_object = RequestObject(resource_type="docs", operation_type=write_operation, headers={})
+        if exclude_client_regions is False:
+            request_object.excluded_locations = [REGION_2]
+        available_ppaf_regions = global_endpoint_manager.compute_available_preferred_regions(request_object)
+        assert len(available_ppaf_regions) == 1
+        # Check that all requests are marked as non-PPAF available due to the fact that we only have one region
+        assert global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_object) is False
 
 
 
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index c741d0b48af2..1b07fc3ece18 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -60,6 +60,15 @@ def write_operations_and_errors(error_list=None):
 
     return params
 
+def write_operations_and_boolean():
+    write_operations = [CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH]
+    params = []
+    for write_operation in write_operations:
+        for boolean in [True, False]:
+            params.append((write_operation, boolean))
+
+    return params
+
 def operations():
     write_operations = [CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH]
     read_operations = [READ, QUERY_PK, CHANGE_FEED_PK, CHANGE_FEED_EPK]

From ccd9def68d23d679de63d66599e47192a7a29d4a Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 21 Aug 2025 23:28:26 -0400
Subject: [PATCH 31/68] session retry tests

---
 .../_endpoint_discovery_retry_policy.py       |   4 +-
 ...anager_per_partition_automatic_failover.py |  11 +-
 .../azure/cosmos/_session_retry_policy.py     |  14 +++
 .../test_per_partition_automatic_failover.py  | 114 +++++++++++++-----
 ..._per_partition_automatic_failover_async.py |  58 ++++++++-
 5 files changed, 162 insertions(+), 39 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index df4def458554..ff94355ae7ba 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -72,9 +72,9 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
             location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
                 str(self.request.location_endpoint_to_route))
-            regional_context = (self.global_endpoint_manager.location_cache.
+            location_endpoint = (self.global_endpoint_manager.location_cache.
                                 account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
-            partition_level_info.unavailable_regional_endpoints[location] = regional_context
+            partition_level_info.unavailable_regional_endpoints[location] = location_endpoint
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index aecbe2209c9d..b1516bfe3c5d 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -74,7 +74,15 @@ def __init__(self, client: "CosmosClientConnection"):
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
 
+    def is_per_partition_automatic_failover_enabled(self) -> bool:
+        if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
+            return False
+        return True
+
     def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
+        if not self.is_per_partition_automatic_failover_enabled():
+            return False
+
         if not request:
             return False
 
@@ -82,9 +90,6 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
-            return False
-
         # if we have at most one region available in the account, we cannot do per partition automatic failover
         available_regions = self.compute_available_preferred_regions(request)
         if len(available_regions) <= 1:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
index f10366ac4c7f..6561589f71ca 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
@@ -105,6 +105,20 @@ def ShouldRetry(self, _exception):
         self.request.route_to_location_with_preferred_location_flag(self.session_token_retry_count - 1, False)
         self.request.should_clear_session_token_on_session_read_failure = True
 
+        # For PPAF, the retry should happen to whatever the relevant write region is for the affected partition.
+        if self.global_endpoint_manager.is_per_partition_automatic_failover_enabled():
+            pk_failover_info = self.global_endpoint_manager.partition_range_to_failover_info.get(self.pk_range_wrapper)
+            location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
+                str(self.request.location_endpoint_to_route))
+            if location in pk_failover_info.unavailable_regional_endpoints:
+                # If the request endpoint is unavailable, we need to resolve the endpoint for the request using the
+                # partition-level failover info
+                location_endpoint = (self.global_endpoint_manager.location_cache.
+                                     account_read_regional_routing_contexts_by_location.
+                                     get(pk_failover_info.current_region).primary_endpoint)
+                self.request.route_to_location(location_endpoint)
+                return True
+
         # Resolve the endpoint for the request and pin the resolution to the resolved endpoint
         # This enables marking the endpoint unavailability on endpoint failover/unreachability
         self.location_endpoint = (self.global_endpoint_manager
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 8c363fb10f31..eeed945f388f 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -8,8 +8,9 @@
 from azure.core.exceptions import ServiceResponseError
 from azure.cosmos import CosmosClient
 from azure.cosmos.exceptions import CosmosHttpResponseError
+from azure.cosmos._request_object import RequestObject
 from _fault_injection_transport import FaultInjectionTransport
-from test_per_partition_circuit_breaker_mm import (REGION_1, REGION_2, PK_VALUE, BATCH,
+from test_per_partition_circuit_breaker_mm import (REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_boolean,
                                                    write_operations_and_errors, perform_write_operation)
 
 # cspell:disable
@@ -43,32 +44,50 @@ class TestPerPartitionAutomaticFailover:
     TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
-    def setup_method_with_custom_transport(self, custom_transport, default_endpoint=host, read_first=False, **kwargs):
-        regions = [REGION_2, REGION_1] if read_first else [REGION_1, REGION_2]
+    def setup_method_with_custom_transport(self, custom_transport, default_endpoint=host, **kwargs):
+        regions = [REGION_1, REGION_2]
         container_id = kwargs.pop("container_id", None)
+        exclude_client_regions = kwargs.pop("exclude_client_regions", False)
+        excluded_regions = []
+        if exclude_client_regions:
+            excluded_regions = [REGION_2]
         if not container_id:
             container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
         client = CosmosClient(default_endpoint, self.master_key, consistency_level="Session",
                               preferred_locations=regions,
+                              excluded_locations=excluded_regions,
                               transport=custom_transport, **kwargs)
         db = client.get_database_client(self.TEST_DATABASE_ID)
         container = db.get_container_client(container_id)
         return {"client": client, "db": db, "col": container}
 
-    def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
+    def setup_info(self, error=None, max_count=None, is_batch=False, exclude_client_regions=False, session_error=False, **kwargs):
         custom_transport = FaultInjectionTransport()
         # two documents targeted to same partition, one will always fail and the other will succeed
         doc_fail_id = str(uuid.uuid4())
         doc_success_id = str(uuid.uuid4())
-        predicate = lambda r: FaultInjectionTransport.predicate_req_for_document_with_id(r, doc_fail_id)
+        predicate = lambda r: (FaultInjectionTransport.predicate_req_for_document_with_id(r, doc_fail_id)
+                               and FaultInjectionTransport.predicate_is_write_operation(r, "west"))
         # The MockRequest only gets used to create the MockHttpResponse
         mock_request = FaultInjectionTransport.MockHttpRequest(url=self.host)
         if is_batch:
             success_response = FaultInjectionTransport.MockHttpResponse(mock_request, 200, [{"statusCode": 200}],)
         else:
             success_response = FaultInjectionTransport.MockHttpResponse(mock_request, 200)
-        custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
-                                   after_max_count=success_response)
+        if error:
+            custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
+                                       after_max_count=success_response)
+        if session_error:
+            read_predicate = lambda r: (FaultInjectionTransport.predicate_is_operation_type(r, "Read")
+                                        and FaultInjectionTransport.predicate_req_for_document_with_id(r, doc_fail_id))
+            read_error = CosmosHttpResponseError(
+                            status_code=404,
+                            message="Some injected error.",
+                            sub_status=1002)
+            error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, read_error)
+            success_response = FaultInjectionTransport.MockHttpResponse(mock_request, 200)
+            custom_transport.add_fault(predicate=read_predicate, fault_factory=error_lambda, max_inner_count=max_count,
+                                       after_max_count=success_response)
         is_get_account_predicate = lambda r: FaultInjectionTransport.predicate_is_database_account_call(r)
         # Set the database account response to have PPAF enabled
         ppaf_enabled_database_account = \
@@ -76,8 +95,10 @@ def setup_info(self, error, max_count=None, is_batch=False, **kwargs):
         custom_transport.add_response_transformation(
             is_get_account_predicate,
             ppaf_enabled_database_account)
-        setup = self.setup_method_with_custom_transport(None, default_endpoint=self.host, **kwargs)
-        custom_setup = self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host, **kwargs)
+        setup = self.setup_method_with_custom_transport(None, default_endpoint=self.host,
+                                                        exclude_client_regions=exclude_client_regions, **kwargs)
+        custom_setup = self.setup_method_with_custom_transport(custom_transport, default_endpoint=self.host,
+                                                        exclude_client_regions=exclude_client_regions, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
     @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
@@ -180,35 +201,66 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
         assert failure_count == 1 if is_503 else 3
 
+    @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
+    def test_ppaf_exclude_regions(self, write_operation, exclude_client_regions):
+        # This test validates that the per-partition automatic failover logic does not apply to configs without enough regions.
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(exclude_client_regions=exclude_client_regions)
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+        # Check that computing valid regions for PPAF only returns a single region
+        request_object = RequestObject(resource_type="docs", operation_type=write_operation, headers={})
+        if exclude_client_regions is False:
+            request_object.excluded_locations = [REGION_2]
+        available_ppaf_regions = global_endpoint_manager.compute_available_preferred_regions(request_object)
+        assert len(available_ppaf_regions) == 1
+        # Check that all requests are marked as non-PPAF available due to the fact that we only have one region
+        assert global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_object) is False
+
     @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
-    def test_ppaf_exclude_regions(self, write_operation, error):
-        # TODO: finish this test
-        return
-
-    def test_ppaf_session_unavailable_retry(self):
-        # For this test, the main requirement is to have 3 regions total in the account: A, B and C.
-        # Writes go to region A, and reads go to region C. Preferred locations are set to C, B, A in order.
-        # This test depends on the fact that the chosen region for the failover is region B and not region C.
-        # We will inject a 403.3 error on region A, marking it as unavailable with PPAF. We verify the retry goes
-        # to region B next. Next we inject a 404.1002 to a read in the same partition, which should retry to region B
-        # as well since A was marked as unavailable in the context of PPAF.
-
-        # For this test, we have two regions in the account West US 3 (write) and West US (read).
-        # Writes go to West US 3, and reads go to region C - preferred locations are set to that order.
-        # This test depends on the fact that the chosen region for the failover is region B and not region C.
-        # We will inject a 403.3 error on region A, marking it as unavailable with PPAF. We verify the retry goes
-        # to region B next. Next we inject a 404.1002 to a read in the same partition, which should retry to region B
-        # as well since A was marked as unavailable in the context of PPAF.
-
-        # Account config has 2 regions: West US 3 (A) and West US (B).
+    def test_ppaf_session_unavailable_retry(self, write_operation, error):
+        # Account config has 2 regions: West US 3 (A) and West US (B). This test validates that after marking the write
+        # region (A) as unavailable, the next request is retried to the read region (B) and succeeds. The next read request
+        # should see that the write region (A) is unavailable for the partition, and should retry to the read region (B) as well.
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, read_first=True)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, max_count=1,
+                                                                                                        is_batch=write_operation==BATCH, session_error=True)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
-        return
 
+        # Create a document to populate the per-partition GEM partition range info cache
+        fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
+                                                    'name': 'sample document', 'key': 'value'})
+        pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
+        initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
+
+        # Verify the region that is being used for the read requests
+        read_response = fault_injection_container.read_item(doc_success_id, PK_VALUE)
+        uri = read_response.get_response_headers().get('Content-Location')
+        region = fault_injection_container.client_connection._global_endpoint_manager.location_cache.get_location_from_endpoint(uri)
+        assert region == REGION_1 # first preferred region
+
+        # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
+        perform_write_operation(
+            write_operation,
+            container,
+            fault_injection_container,
+            doc_fail_id,
+            PK_VALUE)
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
+        assert len(partition_info.unavailable_regional_endpoints) == 1
+        assert initial_region in partition_info.unavailable_regional_endpoints
+        assert initial_region != partition_info.current_region # west us 3 != west us
+
+        # Now we run a read request that runs into a 404.1002 error, which should retry to the read region
+        # We verify that the read request was going to the correct region by using the raw_response_hook
+        fault_injection_container.read_item(doc_fail_id, PK_VALUE, raw_response_hook=session_retry_hook)
 
+def session_retry_hook(raw_response):
+    # This hook is used to verify the request routing that happens after the session retry logic
+    region_string = "-" + REGION_2.replace(' ', '').lower() + "."
+    assert region_string in raw_response.http_request.url
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index c18e6ed9abb4..3baadf9e9879 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -16,7 +16,7 @@
 from azure.cosmos._request_object import RequestObject
 from _fault_injection_transport import FaultInjectionTransport
 from _fault_injection_transport_async import FaultInjectionTransportAsync
-from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors
+from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors, session_retry_hook
 from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors, write_operations_and_boolean
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
@@ -56,12 +56,13 @@ async def cleanup_method(initialized_objects: Dict[str, Any]):
         method_client: CosmosClient = initialized_objects["client"]
         await method_client.close()
 
-    async def setup_info(self, error=None, max_count=None, is_batch=False, exclude_client_regions=False, **kwargs):
+    async def setup_info(self, error=None, max_count=None, is_batch=False, exclude_client_regions=False, session_error=False, **kwargs):
         custom_transport = FaultInjectionTransportAsync()
         # two documents targeted to same partition, one will always fail and the other will succeed
         doc_fail_id = str(uuid.uuid4())
         doc_success_id = str(uuid.uuid4())
-        predicate = lambda r: FaultInjectionTransportAsync.predicate_req_for_document_with_id(r, doc_fail_id)
+        predicate = lambda r: (FaultInjectionTransportAsync.predicate_req_for_document_with_id(r, doc_fail_id)
+                               and FaultInjectionTransportAsync.predicate_is_write_operation(r, "west"))
         # The MockRequest only gets used to create the MockHttpResponse
         mock_request = FaultInjectionTransport.MockHttpRequest(url=self.host)
         if is_batch:
@@ -71,6 +72,17 @@ async def setup_info(self, error=None, max_count=None, is_batch=False, exclude_c
         if error:
             custom_transport.add_fault(predicate=predicate, fault_factory=error, max_inner_count=max_count,
                                        after_max_count=success_response)
+        if session_error:
+            read_predicate = lambda r: (FaultInjectionTransportAsync.predicate_is_operation_type(r, "Read")
+                                        and FaultInjectionTransportAsync.predicate_req_for_document_with_id(r, doc_fail_id))
+            read_error = CosmosHttpResponseError(
+                            status_code=404,
+                            message="Some injected error.",
+                            sub_status=1002)
+            error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, read_error))
+            success_response = FaultInjectionTransportAsync.MockHttpResponse(mock_request, 200)
+            custom_transport.add_fault(predicate=read_predicate, fault_factory=error_lambda, max_inner_count=max_count,
+                                       after_max_count=success_response)
         is_get_account_predicate = lambda r: FaultInjectionTransportAsync.predicate_is_database_account_call(r)
         # Set the database account response to have PPAF enabled
         ppaf_enabled_database_account = \
@@ -198,6 +210,46 @@ async def test_ppaf_exclude_regions_async(self, write_operation, exclude_client_
         # Check that all requests are marked as non-PPAF available due to the fact that we only have one region
         assert global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_object) is False
 
+    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
+    async def test_ppaf_session_unavailable_retry_async(self, write_operation, error):
+        # Account config has 2 regions: West US 3 (A) and West US (B). This test validates that after marking the write
+        # region (A) as unavailable, the next request is retried to the read region (B) and succeeds. The next read request
+        # should see that the write region (A) is unavailable for the partition, and should retry to the read region (B) as well.
+        error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, max_count=1,
+                                                                                                        is_batch=write_operation==BATCH, session_error=True)
+        container = setup['col']
+        fault_injection_container = custom_setup['col']
+        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
+
+        # Create a document to populate the per-partition GEM partition range info cache
+        await fault_injection_container.create_item(body={'id': doc_success_id, 'pk': PK_VALUE,
+                                                    'name': 'sample document', 'key': 'value'})
+        pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
+        initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
+
+        # Verify the region that is being used for the read requests
+        read_response = await fault_injection_container.read_item(doc_success_id, PK_VALUE)
+        uri = read_response.get_response_headers().get('Content-Location')
+        region = fault_injection_container.client_connection._global_endpoint_manager.location_cache.get_location_from_endpoint(uri)
+        assert region == REGION_1 # first preferred region
+
+        # Based on our configuration, we should have had one error followed by a success - marking only the previous endpoint as unavailable
+        await perform_write_operation(
+                write_operation,
+                container,
+                fault_injection_container,
+                doc_fail_id,
+                PK_VALUE)
+        partition_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
+        # Verify that the partition is marked as unavailable, and that the current regional endpoint is not the same
+        assert len(partition_info.unavailable_regional_endpoints) == 1
+        assert initial_region in partition_info.unavailable_regional_endpoints
+        assert initial_region != partition_info.current_region # west us 3 != west us
+
+        # Now we run a read request that runs into a 404.1002 error, which should retry to the read region
+        # We verify that the read request was going to the correct region by using the raw_response_hook
+        fault_injection_container.read_item(doc_fail_id, PK_VALUE, raw_response_hook=session_retry_hook)
 
 
 if __name__ == '__main__':

From 1dccc5d9d8129a0774e620077e54a82145787a53 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 22 Aug 2025 08:45:14 -0400
Subject: [PATCH 32/68] pylint, cspell

---
 .../azure/cosmos/_session_retry_policy.py             |  8 ++------
 ..._manager_per_partition_automatic_failover_async.py | 11 ++++++++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
index 6561589f71ca..5c40bf2881f9 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
@@ -22,7 +22,7 @@
 """Internal class for session read/write unavailable retry policy implementation
 in the Azure Cosmos database service.
 """
-
+# cspell:disable
 from azure.cosmos.documents import _OperationType
 
 class _SessionRetryPolicy(object):
@@ -60,16 +60,12 @@ def ShouldRetry(self, _exception):
         :returns: a boolean stating whether the request should be retried
         :rtype: bool
         """
-        if not self.request:
+        if not self.request or not self.endpoint_discovery_enable:
             return False
         self.session_token_retry_count += 1
         # clear previous location-based routing directive
         self.request.clear_route_to_location()
 
-        if not self.endpoint_discovery_enable:
-            # if endpoint discovery is disabled, the request cannot be retried anywhere else
-            return False
-
         if self.can_use_multiple_write_locations:
             if _OperationType.IsReadOnlyOperation(self.request.operation_type):
                 locations = self.global_endpoint_manager.get_ordered_read_locations()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 5b805b2e145d..4937a10d7c76 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -75,7 +75,15 @@ def __init__(self, client: "CosmosClientConnection"):
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
 
+    def is_per_partition_automatic_failover_enabled(self) -> bool:
+        if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
+            return False
+        return True
+
     def is_per_partition_automatic_failover_applicable(self, request: RequestObject) -> bool:
+        if not self.is_per_partition_automatic_failover_enabled():
+            return False
+
         if not request:
             return False
 
@@ -83,9 +91,6 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
                 or _OperationType.IsReadOnlyOperation(request.operation_type)):
             return False
 
-        if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
-            return False
-
         # if we have at most one region available in the account, we cannot do per partition automatic failover
         available_regions = self.compute_available_preferred_regions(request)
         if len(available_regions) <= 1:

From c2bb93a2d1794905944a347b9c57fba6f3cc2454 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 25 Aug 2025 11:05:48 -0500
Subject: [PATCH 33/68] change errors since 503 is now retried directly

---
 .../azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index 399eb4ed86bb..8fe56c06b11b 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -80,7 +80,7 @@ def operations():
 
 def create_errors(errors=None):
     errors = []
-    error_codes = [408, 500, 502, 503]
+    error_codes = [408, 500, 502, 504]
     for error_code in error_codes:
         errors.append(CosmosHttpResponseError(
             status_code=error_code,

From c3879d8905af50290bb5b058a202e4b30d56a9b7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 26 Aug 2025 12:24:42 -0500
Subject: [PATCH 34/68] Update sdk/cosmos/azure-cosmos/README.md

Co-authored-by: Abhijeet Mohanty <mabhijeet1995@gmail.com>
---
 sdk/cosmos/azure-cosmos/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/README.md b/sdk/cosmos/azure-cosmos/README.md
index 11b327b6c369..45a147aaaed0 100644
--- a/sdk/cosmos/azure-cosmos/README.md
+++ b/sdk/cosmos/azure-cosmos/README.md
@@ -942,7 +942,7 @@ requests to another region:
 
 ### Per Partition Automatic Failover (Public Preview)
 Per partition automatic failover enables the SDK to automatically redirect write requests at the partition level to another region based on service-side signals. This feature is available 
-only for single write region accounts that have at least one read-only region. When per partition automatic failover is enabled, per partition circuit breaker and hedging is enabled by default, meaning 
+only for single write region accounts that have at least one read-only region. When per partition automatic failover is enabled, per partition circuit breaker and cross-region hedging is enabled by default, meaning 
 all its configurable options also apply to per partition automatic failover. To enable this feature, follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover).
 
 ## Troubleshooting

From 1d57bf276f5809942194fb930237b63d1179b826 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 26 Aug 2025 15:56:42 -0500
Subject: [PATCH 35/68] address comments

update changelog, update docs, add typehints and documentation
---
 sdk/cosmos/azure-cosmos/CHANGELOG.md          |  3 +-
 sdk/cosmos/azure-cosmos/azure/cosmos/_base.py | 20 -------
 .../_endpoint_discovery_retry_policy.py       |  4 +-
 ...anager_per_partition_automatic_failover.py | 55 +++++++++++++++---
 .../cosmos/_service_response_retry_policy.py  |  3 +-
 .../_service_unavailable_retry_policy.py      | 26 +++++++--
 .../azure/cosmos/_session_retry_policy.py     | 21 +++----
 .../cosmos/_timeout_failover_retry_policy.py  |  3 +-
 ..._per_partition_automatic_failover_async.py | 56 ++++++++++++++++---
 .../azure/cosmos/aio/_retry_utility_async.py  |  2 -
 .../azure-cosmos/docs/ErrorCodesAndRetries.md | 24 ++++----
 11 files changed, 147 insertions(+), 70 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md
index b7b00e071517..39f312c60cb3 100644
--- a/sdk/cosmos/azure-cosmos/CHANGELOG.md
+++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 #### Features Added
 * Added read_items API to provide an efficient method for retrieving multiple items in a single request. See [PR 42167](https://github.com/Azure/azure-sdk-for-python/pull/42167).
+* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover). See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Breaking Changes
 
@@ -11,6 +12,7 @@
 * Improved the resilience of Database Account Read metadata operation against short-lived network issues by increasing number of retries. See [PR 42525](https://github.com/Azure/azure-sdk-for-python/pull/42525).
 * Fixed bug where during health checks read regions were marked as unavailable for write operations. See [PR 42525](https://github.com/Azure/azure-sdk-for-python/pull/42525).
 * Fixed bug where `excluded_locations` was not being honored for some metadata calls. See [PR 42266](https://github.com/Azure/azure-sdk-for-python/pull/42266).
+* Added cross-regional retries for 503 (Service Unavailable) errors. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Other Changes
 * Added session token false progress merge logic. See [42393](https://github.com/Azure/azure-sdk-for-python/pull/42393)
@@ -19,7 +21,6 @@
 
 #### Features Added
 * Added feed range support in `query_items`. See [PR 41722](https://github.com/Azure/azure-sdk-for-python/pull/41722).
-* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover). See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Bugs Fixed
 * Fixed session container session token logic. The SDK will now only send the relevant partition-local session tokens for read document requests and write requests when multi-region writes are enabled, as opposed to the entire compound session token for the container for every document request. See [PR 41678](https://github.com/Azure/azure-sdk-for-python/pull/41678).
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
index da5a5befbf91..1fdc40a46d68 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py
@@ -28,7 +28,6 @@
 import uuid
 import re
 import binascii
-import os
 from typing import Dict, Any, List, Mapping, Optional, Sequence, Union, Tuple, TYPE_CHECKING
 
 from urllib.parse import quote as urllib_quote
@@ -940,22 +939,3 @@ def _build_properties_cache(properties: Dict[str, Any], container_link: str) ->
         "_self": properties.get("_self", None), "_rid": properties.get("_rid", None),
         "partitionKey": properties.get("partitionKey", None), "container_link": container_link
     }
-
-def try_ppaf_failover_threshold(
-        global_endpoint_manager: "_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover",
-        pk_range_wrapper: "PartitionKeyRangeWrapper",
-        request: "RequestObject"):
-    # If PPAF is enabled, we track consecutive failures for certain exceptions, and only fail over at a partition
-    # level after the threshold is reached
-    if request and global_endpoint_manager.is_per_partition_automatic_failover_applicable(request):
-        if (global_endpoint_manager.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
-                >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
-                                      Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
-            # If the PPAF threshold is reached, we reset the count and retry to the next region
-            global_endpoint_manager.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
-            partition_level_info = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper]
-            location = global_endpoint_manager.location_cache.get_location_from_endpoint(
-                str(request.location_endpoint_to_route))
-            regional_context = (global_endpoint_manager.location_cache.
-                                account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
-            partition_level_info.unavailable_regional_endpoints[location] = regional_context
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index ff94355ae7ba..14eed13d8dbc 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -72,9 +72,9 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
             location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
                 str(self.request.location_endpoint_to_route))
-            location_endpoint = (self.global_endpoint_manager.location_cache.
+            regional_endpoint = (self.global_endpoint_manager.location_cache.
                                 account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
-            partition_level_info.unavailable_regional_endpoints[location] = location_endpoint
+            partition_level_info.unavailable_regional_endpoints[location] = regional_endpoint
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index b1516bfe3c5d..2dceacbdaabf 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -6,10 +6,12 @@
 """
 import logging
 import threading
+import os
 
 from typing import Dict, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
+from azure.cosmos._constants import _Constants as Constants
 from azure.cosmos._global_partition_endpoint_manager_circuit_breaker import \
     _GlobalPartitionEndpointManagerForCircuitBreaker
 from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
@@ -43,8 +45,7 @@ def try_move_to_next_location(
             request: RequestObject) -> bool:
         with self._lock:
             if endpoint_region != self.current_region:
-                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
-                # make the actual endpoint since the current_region is just West US
+                logger.warning("PPAF - Moving to next available regional endpoint %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -95,22 +96,62 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
         if len(available_regions) <= 1:
             return False
 
-        # if the request is not for a document or if the request is not executing a stored procedure, return False
-        if (request.resource_type != ResourceType.Document and
-                request.operation_type != _OperationType.ExecuteJavaScript):
+        # if the request is not a non-query plan document request
+        # or if the request is not executing a stored procedure, return False
+        if ((request.resource_type != ResourceType.Document and
+                request.operation_type != _OperationType.ExecuteJavaScript) or
+                 request.operation_type == _OperationType.QueryPlan):
             return False
 
         return True
 
+    def try_ppaf_failover_threshold(
+            self,
+            pk_range_wrapper: "PartitionKeyRangeWrapper",
+            request: "RequestObject"):
+        """Verifies whether the per-partition failover threshold has been reached for consecutive errors. If so,
+        it marks the current region as unavailable for the given partition key range, and moves to the next available
+        region for the request.
+
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :param RequestObject request: The request object containing the routing context.
+        :returns: None
+        """
+        # If PPAF is enabled, we track consecutive failures for certain exceptions, and only fail over at a partition
+        # level after the threshold is reached
+        if request and self.is_per_partition_automatic_failover_applicable(request):
+            if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
+                    >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
+                                          Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
+                # If the PPAF threshold is reached, we reset the count and retry to the next region
+                self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+                partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
+                location = self.location_cache.get_location_from_endpoint(
+                    str(request.location_endpoint_to_route))
+                regional_context = (self.location_cache.
+                                    account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+                partition_level_info.unavailable_regional_endpoints[location] = regional_context
+
     def resolve_service_endpoint_for_partition(
             self,
             request: RequestObject,
             pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
     ) -> str:
+        """Resolves the endpoint to be used for the request. In a PPAF-enabled account, this method checks whether
+        the partition key range has any unavailable regions, and if so, it tries to move to the next available region.
+        If all regions are unavailable, it invalidates the cache and starts once again from the main write region in the
+        account configurations.
+
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :param RequestObject request: The request object containing the routing context.
+        :returns: The regional endpoint to be used for the request.
+        :rtype: str
+        """
         if self.is_per_partition_automatic_failover_applicable(request) and pk_range_wrapper:
             # If per partition automatic failover is applicable, we check partition unavailability
             if pk_range_wrapper in self.partition_range_to_failover_info:
-                logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
                 partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
                 if request.location_endpoint_to_route is not None:
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
@@ -120,7 +161,7 @@ def resolve_service_endpoint_for_partition(
                                 self.compute_available_preferred_regions(request),
                                 endpoint_region,
                                 request):
-                            logger.info("All available regions for partition are unavailable. Refreshing cache.")
+                            logger.warning("All available regions for partition are unavailable. Refreshing cache.")
                             # If no other region is available, we invalidate the cache and start once again from our
                             # main write region in the account configurations
                             self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py
index 31f6e800d5e1..b1e176299278 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_response_retry_policy.py
@@ -10,7 +10,6 @@
 
 import logging
 from azure.cosmos.documents import _OperationType
-from azure.cosmos._base import try_ppaf_failover_threshold
 
 class ServiceResponseRetryPolicy(object):
 
@@ -51,7 +50,7 @@ def ShouldRetry(self):
         if self.request:
             # We track consecutive failures for per partition automatic failover, and only fail over at a partition
             # level after the threshold is reached
-            try_ppaf_failover_threshold(self.global_endpoint_manager, self.pk_range_wrapper, self.request)
+            self.global_endpoint_manager.try_ppaf_failover_threshold(self.pk_range_wrapper, self.request)
             if not _OperationType.IsReadOnlyOperation(self.request.operation_type) and not self.request.retry_write:
                 return False
             if self.request.retry_write and self.failover_retry_count + 1 >= self.max_write_retry_count:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index 245f72bf5489..4aad1388f0a6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -1,16 +1,32 @@
 # The MIT License (MIT)
 # Copyright (c) Microsoft Corporation. All rights reserved.
 
-"""Internal class for service unavailable retry policy implementation in the Azure
-Cosmos database service.
+"""Internal class for service unavailable errors implementation in the Azure Cosmos database service.
+
+Service unavailable errors can occur when a request does not make it to the service, or when there is an issue with
+the service. In either case, we know the request did not get processed successfully, so service unavailable errors are
+ retried in the next available preferred region.
 """
-from azure.cosmos.documents import _OperationType
+from typing import Union
+from azure.cosmos.documents import _OperationType, ConnectionPolicy
+from azure.cosmos.exceptions import CosmosHttpResponseError
+from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
+from azure.cosmos._global_partition_endpoint_manager_per_partition_automatic_failover import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover # pylint: disable=line-too-long
+from azure.cosmos.aio._global_partition_endpoint_manager_per_partition_automatic_failover_async import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync # pylint: disable=line-too-long
+
+_GlobalEndpointManagerType = Union[_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync,
+                                    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover]
 
 #cspell:ignore ppaf
 
 class _ServiceUnavailableRetryPolicy(object):
 
-    def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper, *args):
+    def __init__(
+            self,
+            connection_policy: ConnectionPolicy,
+            global_endpoint_manager: _GlobalEndpointManagerType,
+            pk_range_wrapper: PartitionKeyRangeWrapper,
+            *args):
         self.retry_after_in_milliseconds = 500
         self.global_endpoint_manager = global_endpoint_manager
         self.pk_range_wrapper = pk_range_wrapper
@@ -24,7 +40,7 @@ def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper,
             self._max_retry_attempt_count = max(2, len(
                 self.global_endpoint_manager.location_cache.write_regional_routing_contexts))
 
-    def ShouldRetry(self, _exception):
+    def ShouldRetry(self, _exception: CosmosHttpResponseError):
         """Returns true if the request should retry based on the passed-in exception.
 
         :param exceptions.CosmosHttpResponseError _exception:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
index 5c40bf2881f9..1e530ad9e37e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
@@ -104,16 +104,17 @@ def ShouldRetry(self, _exception):
         # For PPAF, the retry should happen to whatever the relevant write region is for the affected partition.
         if self.global_endpoint_manager.is_per_partition_automatic_failover_enabled():
             pk_failover_info = self.global_endpoint_manager.partition_range_to_failover_info.get(self.pk_range_wrapper)
-            location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
-                str(self.request.location_endpoint_to_route))
-            if location in pk_failover_info.unavailable_regional_endpoints:
-                # If the request endpoint is unavailable, we need to resolve the endpoint for the request using the
-                # partition-level failover info
-                location_endpoint = (self.global_endpoint_manager.location_cache.
-                                     account_read_regional_routing_contexts_by_location.
-                                     get(pk_failover_info.current_region).primary_endpoint)
-                self.request.route_to_location(location_endpoint)
-                return True
+            if pk_failover_info is not None:
+                location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
+                    str(self.request.location_endpoint_to_route))
+                if location in pk_failover_info.unavailable_regional_endpoints:
+                    # If the request endpoint is unavailable, we need to resolve the endpoint for the request using the
+                    # partition-level failover info
+                    location_endpoint = (self.global_endpoint_manager.location_cache.
+                                         account_read_regional_routing_contexts_by_location.
+                                         get(pk_failover_info.current_region).primary_endpoint)
+                    self.request.route_to_location(location_endpoint)
+                    return True
 
         # Resolve the endpoint for the request and pin the resolution to the resolved endpoint
         # This enables marking the endpoint unavailability on endpoint failover/unreachability
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index 952685ef5e06..ba5047df0ccc 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -5,7 +5,6 @@
 Cosmos database service.
 """
 from azure.cosmos.documents import _OperationType
-from azure.cosmos._base import try_ppaf_failover_threshold
 
 # cspell:ignore PPAF, ppaf
 
@@ -38,7 +37,7 @@ def ShouldRetry(self, _exception):
         :returns: a boolean stating whether the request should be retried
         :rtype: bool
         """
-        try_ppaf_failover_threshold(self.global_endpoint_manager, self.pk_range_wrapper, self.request)
+        self.global_endpoint_manager.try_ppaf_failover_threshold(self.pk_range_wrapper, self.request)
 
         # we retry only if the request is a read operation or if it is a write operation with retry enabled
         if self.request and not self.is_operation_retryable():
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 4937a10d7c76..e96d9e3152a7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -6,10 +6,12 @@
 """
 import logging
 import threading
+import os
 
 from typing import Dict, TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
+from azure.cosmos._constants import _Constants as Constants
 from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import \
     _GlobalPartitionEndpointManagerForCircuitBreakerAsync
 from azure.cosmos.documents import _OperationType
@@ -43,7 +45,7 @@ def try_move_to_next_location(
             request: RequestObject) -> bool:
         with self._lock:
             if endpoint_region != self.current_region:
-                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 # make the actual endpoint since the current_region is just West US
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
@@ -57,7 +59,7 @@ def try_move_to_next_location(
                     continue
 
                 self.current_region = regional_endpoint
-                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -96,22 +98,62 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
         if len(available_regions) <= 1:
             return False
 
-        # if the request is not for a document or if the request is not executing a stored procedure, return False
-        if (request.resource_type != ResourceType.Document and
-                request.operation_type != _OperationType.ExecuteJavaScript):
+        # if the request is not a non-query plan document request
+        # or if the request is not executing a stored procedure, return False
+        if ((request.resource_type != ResourceType.Document and
+                request.operation_type != _OperationType.ExecuteJavaScript) or
+                 request.operation_type == _OperationType.QueryPlan):
             return False
 
         return True
 
+    def try_ppaf_failover_threshold(
+            self,
+            pk_range_wrapper: "PartitionKeyRangeWrapper",
+            request: "RequestObject"):
+        """Verifies whether the per-partition failover threshold has been reached for consecutive errors. If so,
+        it marks the current region as unavailable for the given partition key range, and moves to the next available
+        region for the request.
+
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :param RequestObject request: The request object containing the routing context.
+        :returns: None
+        """
+        # If PPAF is enabled, we track consecutive failures for certain exceptions, and only fail over at a partition
+        # level after the threshold is reached
+        if request and self.is_per_partition_automatic_failover_applicable(request):
+            if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
+                    >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
+                                          Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
+                # If the PPAF threshold is reached, we reset the count and retry to the next region
+                self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+                partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
+                location = self.location_cache.get_location_from_endpoint(
+                    str(request.location_endpoint_to_route))
+                regional_context = (self.location_cache.
+                                    account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+                partition_level_info.unavailable_regional_endpoints[location] = regional_context
+
     def resolve_service_endpoint_for_partition(
             self,
             request: RequestObject,
             pk_range_wrapper: Optional[PartitionKeyRangeWrapper]
     ) -> str:
+        """Resolves the endpoint to be used for the request. In a PPAF-enabled account, this method checks whether
+        the partition key range has any unavailable regions, and if so, it tries to move to the next available region.
+        If all regions are unavailable, it invalidates the cache and starts once again from the main write region in the
+        account configurations.
+
+        :param PartitionKeyRangeWrapper pk_range_wrapper: The wrapper containing the partition key range information
+            for the request.
+        :param RequestObject request: The request object containing the routing context.
+        :returns: The regional endpoint to be used for the request.
+        :rtype: str
+        """
         if self.is_per_partition_automatic_failover_applicable(request) and pk_range_wrapper:
             # If per partition automatic failover is applicable, we check partition unavailability
             if pk_range_wrapper in self.partition_range_to_failover_info:
-                logger.info("Resolving service endpoint for partition with per partition automatic failover enabled.")
                 partition_failover_info = self.partition_range_to_failover_info[pk_range_wrapper]
                 if request.location_endpoint_to_route is not None:
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
@@ -121,7 +163,7 @@ def resolve_service_endpoint_for_partition(
                                 self.compute_available_preferred_regions(request),
                                 endpoint_region,
                                 request):
-                            logger.info("All available regions for partition are unavailable. Refreshing cache.")
+                            logger.warning("All available regions for partition are unavailable. Refreshing cache.")
                             # If no other region is available, we invalidate the cache and start once again from our
                             # main write region in the account configurations
                             self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 6afc01c53f02..88caac506cb1 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -202,11 +202,9 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
                     if retry_policy.should_update_throughput_link(request.body, cached_container):
                         new_body = retry_policy._update_throughput_link(request.body)
                         request.body = new_body
-
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
-                # if ppaf is applicable, we record the failure
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
diff --git a/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md b/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md
index 5f9cfd3c03d6..bc1dd0bf79d4 100644
--- a/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md
+++ b/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md
@@ -2,17 +2,17 @@
 
 The Cosmos DB Python SDK has several default policies that will deal with retrying certain errors and exceptions. More information on these can be found below.
 
-| Status code  | Cause of exception and retry behavior                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| :--- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 400    | For all operations: </br><ul><li> This exception is encountered when the request is invalid, which could be for any of the following reasons: </br><ul><li>Syntax error in query text</li><li>Malformed JSON document for a write request</li><li>Incorrectly formatted REST API request body etc.</li></ul></li><li>The client does NOT retry the request when a Bad Request (400) exception is thrown by the server.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| 401   | For all operations: </br><ul><li> This is an unauthorized exception due to invalid auth tokens being used for the request. The client does NOT retry requests when this exception is encountered.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| 403    | <ul><li>For Substatus 3 (Write Forbidden) and Substatus 1008 (Database Account Not Found): </br><ul><li>This exception occurs when a geo-replicated database account runs into writable/readable location changes (say, after a failover).</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li></ul></li><li>For all other cases: </br><ul><li> The client does NOT retry requests when this exception is encountered. </li>                                                                                                                                                                                                                                                                                                                                                                                             |
+| Status code | Cause of exception and retry behavior                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|:------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 400         | For all operations: </br><ul><li> This exception is encountered when the request is invalid, which could be for any of the following reasons: </br><ul><li>Syntax error in query text</li><li>Malformed JSON document for a write request</li><li>Incorrectly formatted REST API request body etc.</li></ul></li><li>The client does NOT retry the request when a Bad Request (400) exception is thrown by the server.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| 401         | For all operations: </br><ul><li> This is an unauthorized exception due to invalid auth tokens being used for the request. The client does NOT retry requests when this exception is encountered.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| 403         | <ul><li>For Substatus 3 (Write Forbidden) and Substatus 1008 (Database Account Not Found): </br><ul><li>This exception occurs when a geo-replicated database account runs into writable/readable location changes (say, after a failover).</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li></ul></li><li>For all other cases: </br><ul><li> The client does NOT retry requests when this exception is encountered. </li>                                                                                                                                                                                                                                                                                                                                                                                             |
 | 404/1002    | <ul><li>For write operations: </br><ul><li>If multiple write locations are enabled for the account, the SDK will fetch the write endpoints and retry once per each of these. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li><li>If the account does not have multiple write locations enabled, the SDK will retry only once in the account primary region. </li></ul></li><li>For read operations: </br><ul><li>If multiple write locations are enabled for the account, the SDK will fetch the read endpoints and retry once per each of these. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li><li>If the account does not have multiple write locations enabled, the SDK will retry only once in the account primary region. </li>                                                         |
-| 408    | <ul><li>For Write Operations: <br><ul><li>Timeout exceptions can be encountered by both the client as well as the server. Server-side timeout exceptions are not retried for write operations as it is not possible to determine if the write was in fact successfully committed on the server. For a client-generated timeout exception, either the request was sent over the wire to the server by the client and the network request timeout exceeded while waiting for a response, or the request was not sent over the wire to the server which resulted in a client-generated timeout. The client does NOT retry for either.</li></ul><li>For Query and Point Read Operations:</br><ul><li>The SDK will retry on the next preferred region, if any is available.</li></ul> </li></ul>                                                                                                                                                                                                                                    |
-| 409    | <ul><li>For Write Operations: </br><ul><li>This exception occurs when an attempt is made by the application to Create/Insert an Item that already exists.</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>This exception can occur for write operations when an attempt is made to create an existing item or when a unique key constraint violation occurs. </li><li>The client does NOT retry on Conflict exceptions </li></ul></li><li>For Query and Point Read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert operations. </li></ul></li>                                                                                                                                                                                                                                                                                                                                                                                         |
+| 408         | <ul><li>For Write Operations: <br><ul><li>Timeout exceptions can be encountered by both the client as well as the server. Server-side timeout exceptions are not retried for write operations as it is not possible to determine if the write was in fact successfully committed on the server. For a client-generated timeout exception, either the request was sent over the wire to the server by the client and the network request timeout exceeded while waiting for a response, or the request was not sent over the wire to the server which resulted in a client-generated timeout. The client does NOT retry for either.</li></ul><li>For Query and Point Read Operations:</br><ul><li>The SDK will retry on the next preferred region, if any is available.</li></ul> </li></ul>                                                                                                                                                                                                                                    |
+| 409         | <ul><li>For Write Operations: </br><ul><li>This exception occurs when an attempt is made by the application to Create/Insert an Item that already exists.</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>This exception can occur for write operations when an attempt is made to create an existing item or when a unique key constraint violation occurs. </li><li>The client does NOT retry on Conflict exceptions </li></ul></li><li>For Query and Point Read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert operations. </li></ul></li>                                                                                                                                                                                                                                                                                                                                                                                         |
 | 410/1002    | <ul><li>For all operations: </br><ul><li>This exception occurs when a partition is split (or merged in the future) and no longer exists, and can occur regardless of the Consistency level set for the account.</li><li>The SDK will refresh its partition key range cache and trigger a single retry, fetching the new ranges from the gateway once it finds an empty cache. </li>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| 412  | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when the etag that is sent to the server for validation prior to updating an Item, does not match the etag of the Item on the server. </li><li>The client does NOT retry this operation locally or against any of the remote regions for the account as retries would not help alleviate the etag mismatch. </li><li>The application would need to trigger a retry by first reading the Item, fetching the latest etag and issuing the Upsert/Replace operation. </br><ul><li>This operation can continue to fail with the same exception when multiple updates are executed concurrently for the same Item. </li><li>An upper bound on the number of retries before handing off the Item to a dead letter queue should be implemented by the application. </li></ul></li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul> |
-| 429  | For all Operations: </br><ul><li>By default, the client retries the request for a maximum of 9 times (or for a maximum of 30 seconds, whichever limit is reached first). </li><li>The client can also be initialized with a custom retry policy, which overrides the two limits mentioned above. </li><li>After all the retries are exhausted, the client bubbles up the exception to the application. </li><li>**For a multi-region account**, the client does NOT retry the request against a remote region for the account. </li><li>When the application receives a Request Rate too large exception (429), the application would need to instrument its own retry logic and dead letter queues. </li></ul>                                                                                                                                                                                                                                                                                                                |
-| 449  | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when a resource is concurrently updated on the server, which can happen due to concurrent writes, user triggered while conflicts are concurrently being resolved etc. </li><li>Only one update can be executed at a time per item. The other concurrent requests will fail with a Concurrent Execution Exception (449). </li><li>The client does NOT retry requests that failed with a 449. </li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul>                                                                                                                                                                                                                                                                                                                                                                          |
-| 500  | For all Operations: </br><ul><li>The occurrence of an Invalid Exception (500) is extremely rare, and the client will retry a request that encounters this exception on the next preferred regions. </li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| 503  | When a Service Unavailable exception is encountered:  </br><ul><li>The request will be retried by the SDK on the next preferred regions.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| 412         | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when the etag that is sent to the server for validation prior to updating an Item, does not match the etag of the Item on the server. </li><li>The client does NOT retry this operation locally or against any of the remote regions for the account as retries would not help alleviate the etag mismatch. </li><li>The application would need to trigger a retry by first reading the Item, fetching the latest etag and issuing the Upsert/Replace operation. </br><ul><li>This operation can continue to fail with the same exception when multiple updates are executed concurrently for the same Item. </li><li>An upper bound on the number of retries before handing off the Item to a dead letter queue should be implemented by the application. </li></ul></li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul> |
+| 429         | For all Operations: </br><ul><li>By default, the client retries the request for a maximum of 9 times (or for a maximum of 30 seconds, whichever limit is reached first). </li><li>The client can also be initialized with a custom retry policy, which overrides the two limits mentioned above. </li><li>After all the retries are exhausted, the client bubbles up the exception to the application. </li><li>**For a multi-region account**, the client does NOT retry the request against a remote region for the account. </li><li>When the application receives a Request Rate too large exception (429), the application would need to instrument its own retry logic and dead letter queues. </li></ul>                                                                                                                                                                                                                                                                                                                |
+| 449         | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when a resource is concurrently updated on the server, which can happen due to concurrent writes, user triggered while conflicts are concurrently being resolved etc. </li><li>Only one update can be executed at a time per item. The other concurrent requests will fail with a Concurrent Execution Exception (449). </li><li>The client does NOT retry requests that failed with a 449. </li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul>                                                                                                                                                                                                                                                                                                                                                                          |
+| 500         | <ul><li>For Write Operations: </br><ul><li>The client does NOT retry write requests. </li></ul></li><li>For Read Operations: </br><ul><li>The request will be retried by the SDK on the next preferred regions. </li></ul></li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| 503         | When a Service Unavailable exception is encountered, for all Operations:  </br><ul><li>The request will be retried by the SDK on the next preferred regions.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |

From eec77e76b71bb41135f53967952a25fb628b1d0d Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 26 Aug 2025 16:34:23 -0500
Subject: [PATCH 36/68] Update _service_unavailable_retry_policy.py

---
 .../cosmos/_service_unavailable_retry_policy.py  | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index 4aad1388f0a6..54615ca597c6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -7,25 +7,17 @@
 the service. In either case, we know the request did not get processed successfully, so service unavailable errors are
  retried in the next available preferred region.
 """
-from typing import Union
-from azure.cosmos.documents import _OperationType, ConnectionPolicy
+from azure.cosmos.documents import _OperationType
 from azure.cosmos.exceptions import CosmosHttpResponseError
-from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
-from azure.cosmos._global_partition_endpoint_manager_per_partition_automatic_failover import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover # pylint: disable=line-too-long
-from azure.cosmos.aio._global_partition_endpoint_manager_per_partition_automatic_failover_async import _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync # pylint: disable=line-too-long
-
-_GlobalEndpointManagerType = Union[_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync,
-                                    _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover]
 
 #cspell:ignore ppaf
 
 class _ServiceUnavailableRetryPolicy(object):
-
     def __init__(
             self,
-            connection_policy: ConnectionPolicy,
-            global_endpoint_manager: _GlobalEndpointManagerType,
-            pk_range_wrapper: PartitionKeyRangeWrapper,
+            connection_policy,
+            global_endpoint_manager,
+            pk_range_wrapper,
             *args):
         self.retry_after_in_milliseconds = 500
         self.global_endpoint_manager = global_endpoint_manager

From 4c2bf3223b7fa105672bf6a7ff74d4350ca83ca0 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 26 Aug 2025 16:49:49 -0500
Subject: [PATCH 37/68] small test updates for 503 behavior

---
 ..._endpoint_manager_per_partition_automatic_failover.py | 5 ++---
 ...int_manager_per_partition_automatic_failover_async.py | 5 ++---
 .../tests/test_per_partition_automatic_failover.py       | 9 +++------
 .../tests/test_per_partition_automatic_failover_async.py | 9 +++------
 4 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 2dceacbdaabf..cd6f9e85a329 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -98,9 +98,8 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
 
         # if the request is not a non-query plan document request
         # or if the request is not executing a stored procedure, return False
-        if ((request.resource_type != ResourceType.Document and
-                request.operation_type != _OperationType.ExecuteJavaScript) or
-                 request.operation_type == _OperationType.QueryPlan):
+        if (request.resource_type != ResourceType.Document and
+                request.operation_type != _OperationType.ExecuteJavaScript):
             return False
 
         return True
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index e96d9e3152a7..40d54c64a981 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -100,9 +100,8 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
 
         # if the request is not a non-query plan document request
         # or if the request is not executing a stored procedure, return False
-        if ((request.resource_type != ResourceType.Document and
-                request.operation_type != _OperationType.ExecuteJavaScript) or
-                 request.operation_type == _OperationType.QueryPlan):
+        if (request.resource_type != ResourceType.Document and
+                request.operation_type != _OperationType.ExecuteJavaScript):
             return False
 
         return True
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index eeed945f388f..e0f855140bf8 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -162,10 +162,7 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
         initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
 
-        is_503 = hasattr(error, 'status_code') and error.status_code == 503
-        # Since 503 errors are retried by default, we each request counts as two failures
-        consecutive_failures = 3 if is_503 else 6
-
+        consecutive_failures = 6
         for i in range(consecutive_failures):
             # We perform the write operation multiple times to check the consecutive failures logic
             with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
@@ -179,7 +176,7 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         pk_range_wrappers = list(global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count.keys())
         assert len(pk_range_wrappers) == 1
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
-        assert failure_count == 6
+        assert failure_count == consecutive_failures
         # Run some more requests to the same partition to trigger the failover logic
         for i in range(consecutive_failures):
             with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
@@ -199,7 +196,7 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
         # This means we should have one extra failure - verify that the value makes sense
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
-        assert failure_count == 1 if is_503 else 3
+        assert failure_count == 3
 
     @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
     def test_ppaf_exclude_regions(self, write_operation, exclude_client_regions):
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 3baadf9e9879..c5721a0af9e8 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -155,10 +155,7 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         pk_range_wrapper = list(global_endpoint_manager.partition_range_to_failover_info.keys())[0]
         initial_region = global_endpoint_manager.partition_range_to_failover_info[pk_range_wrapper].current_region
 
-        is_503 = hasattr(error, 'status_code') and error.status_code == 503
-        # Since 503 errors are retried by default, we each request counts as two failures
-        consecutive_failures = 3 if is_503 else 6
-
+        consecutive_failures = 6
         for i in range(consecutive_failures):
             # We perform the write operation multiple times to check the consecutive failures logic
             with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
@@ -173,7 +170,7 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         pk_range_wrappers = list(global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count.keys())
         assert len(pk_range_wrappers) == 1
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
-        assert failure_count == 6
+        assert failure_count == consecutive_failures
         # Run some more requests to the same partition to trigger the failover logic
         for i in range(consecutive_failures):
             with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
@@ -193,7 +190,7 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
         # This means we should have one extra failure - verify that the value makes sense
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
-        assert failure_count == 1 if is_503 else 3
+        assert failure_count == 3
 
     @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
     async def test_ppaf_exclude_regions_async(self, write_operation, exclude_client_regions):

From 05654a9d38d738837c288502a1a415ae22252b4c Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 26 Aug 2025 21:19:15 -0500
Subject: [PATCH 38/68] further comments

---
 ...anager_per_partition_automatic_failover.py |  3 +-
 .../_service_unavailable_retry_policy.py      |  8 ++---
 .../cosmos/_timeout_failover_retry_policy.py  |  8 ++---
 .../test_per_partition_automatic_failover.py  | 31 +++++++++++++------
 ..._per_partition_automatic_failover_async.py | 24 +++++++++-----
 5 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index cd6f9e85a329..6e06e3012c18 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -131,6 +131,7 @@ def try_ppaf_failover_threshold(
                 regional_context = (self.location_cache.
                                     account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
                 partition_level_info.unavailable_regional_endpoints[location] = regional_context
+                print(3)
 
     def resolve_service_endpoint_for_partition(
             self,
@@ -169,8 +170,6 @@ def resolve_service_endpoint_for_partition(
                                                                                                 pk_range_wrapper)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
-                        endpoint_region = self.location_cache.get_location_from_endpoint(
-                            request.location_endpoint_to_route)
                         partition_failover_info.current_region = endpoint_region
             else:
                 partition_failover_info = PartitionLevelFailoverInfo()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index 54615ca597c6..8269eb86799d 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -26,11 +26,11 @@ def __init__(
         self.connection_policy = connection_policy
         self.request = args[0] if args else None
         # If an account only has 1 region, then we still want to retry once on the same region
-        self._max_retry_attempt_count = max(2, len(self.global_endpoint_manager.location_cache
-                                                   .read_regional_routing_contexts))
+        self._max_retry_attempt_count = len(self.global_endpoint_manager.
+                                            location_cache.read_regional_routing_contexts) + 1
         if self.request and _OperationType.IsWriteOperation(self.request.operation_type):
-            self._max_retry_attempt_count = max(2, len(
-                self.global_endpoint_manager.location_cache.write_regional_routing_contexts))
+            self._max_retry_attempt_count = len(self.global_endpoint_manager.location_cache.
+                                                write_regional_routing_contexts) + 1
 
     def ShouldRetry(self, _exception: CosmosHttpResponseError):
         """Returns true if the request should retry based on the passed-in exception.
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
index ba5047df0ccc..d019b125c2c3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_timeout_failover_retry_policy.py
@@ -20,12 +20,12 @@ def __init__(self, connection_policy, global_endpoint_manager, pk_range_wrapper,
         # If an account only has 1 region, then we still want to retry once on the same region
         # We want this to be the default retry attempts as paging through a query means there are requests without
         # a request object
-        self._max_retry_attempt_count = max(2, len(self.global_endpoint_manager.location_cache
-                                            .read_regional_routing_contexts))
+        self._max_retry_attempt_count = len(self.global_endpoint_manager.
+                                            location_cache.read_regional_routing_contexts) + 1
        # If the request is a write operation, we only want to retry once if retry write is enabled
         if self.request and _OperationType.IsWriteOperation(self.request.operation_type):
-            self._max_retry_attempt_count = max(2, len(
-                self.global_endpoint_manager.location_cache.write_regional_routing_contexts))
+            self._max_retry_attempt_count = len(self.global_endpoint_manager.location_cache.
+                                                write_regional_routing_contexts) + 1
         self.retry_count = 0
         self.connection_policy = connection_policy
         self.request = args[0] if args else None
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index e0f855140bf8..b3fede1c9f65 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -66,8 +66,8 @@ def setup_info(self, error=None, max_count=None, is_batch=False, exclude_client_
         # two documents targeted to same partition, one will always fail and the other will succeed
         doc_fail_id = str(uuid.uuid4())
         doc_success_id = str(uuid.uuid4())
-        predicate = lambda r: (FaultInjectionTransport.predicate_req_for_document_with_id(r, doc_fail_id)
-                               and FaultInjectionTransport.predicate_is_write_operation(r, "west"))
+        predicate = lambda r: (FaultInjectionTransport.predicate_req_for_document_with_id(r, doc_fail_id) and
+                               FaultInjectionTransport.predicate_is_write_operation(r, "com"))
         # The MockRequest only gets used to create the MockHttpResponse
         mock_request = FaultInjectionTransport.MockHttpRequest(url=self.host)
         if is_batch:
@@ -177,8 +177,19 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         assert len(pk_range_wrappers) == 1
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
         assert failure_count == consecutive_failures
-        # Run some more requests to the same partition to trigger the failover logic
-        for i in range(consecutive_failures):
+
+        # Verify that a single success to the same partition resets the consecutive failures count
+        perform_write_operation(write_operation,
+                                container,
+                                fault_injection_container,
+                                str(uuid.uuid4()),
+                                PK_VALUE)
+
+        failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count.get(pk_range_wrappers[0], 0)
+        assert failure_count == 0
+
+        # Run enough failed requests to the partition to trigger the failover logic
+        for i in range(12):
             with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
                 perform_write_operation(write_operation,
                                         container,
@@ -193,10 +204,9 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         assert initial_region in partition_info.unavailable_regional_endpoints
         assert initial_region != partition_info.current_region # west us 3 != west us
 
-        # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
-        # This means we should have one extra failure - verify that the value makes sense
+        # 12 failures - 10 to trigger failover, 2 more to start counting again
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
-        assert failure_count == 3
+        assert failure_count == 2
 
     @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
     def test_ppaf_exclude_regions(self, write_operation, exclude_client_regions):
@@ -255,9 +265,10 @@ def test_ppaf_session_unavailable_retry(self, write_operation, error):
         fault_injection_container.read_item(doc_fail_id, PK_VALUE, raw_response_hook=session_retry_hook)
 
 def session_retry_hook(raw_response):
-    # This hook is used to verify the request routing that happens after the session retry logic
-    region_string = "-" + REGION_2.replace(' ', '').lower() + "."
-    assert region_string in raw_response.http_request.url
+    if raw_response.http_request.headers.get('x-ms-thinclient-proxy-resource-type') != 'databaseaccount':
+        # This hook is used to verify the request routing that happens after the session retry logic
+        region_string = "-" + REGION_2.replace(' ', '').lower() + "."
+        assert region_string in raw_response.http_request.url
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index c5721a0af9e8..ac0e97576ed2 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -61,8 +61,8 @@ async def setup_info(self, error=None, max_count=None, is_batch=False, exclude_c
         # two documents targeted to same partition, one will always fail and the other will succeed
         doc_fail_id = str(uuid.uuid4())
         doc_success_id = str(uuid.uuid4())
-        predicate = lambda r: (FaultInjectionTransportAsync.predicate_req_for_document_with_id(r, doc_fail_id)
-                               and FaultInjectionTransportAsync.predicate_is_write_operation(r, "west"))
+        predicate = lambda r: (FaultInjectionTransportAsync.predicate_req_for_document_with_id(r, doc_fail_id) and
+                               FaultInjectionTransportAsync.predicate_is_write_operation(r, "com"))
         # The MockRequest only gets used to create the MockHttpResponse
         mock_request = FaultInjectionTransport.MockHttpRequest(url=self.host)
         if is_batch:
@@ -171,8 +171,19 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         assert len(pk_range_wrappers) == 1
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
         assert failure_count == consecutive_failures
-        # Run some more requests to the same partition to trigger the failover logic
-        for i in range(consecutive_failures):
+
+        # Verify that a single success to the same partition resets the consecutive failures count
+        await perform_write_operation(write_operation,
+                                container,
+                                fault_injection_container,
+                                str(uuid.uuid4()),
+                                PK_VALUE)
+
+        failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count.get(pk_range_wrappers[0], 0)
+        assert failure_count == 0
+
+        # Run enough failed requests to the partition to trigger the failover logic
+        for i in range(12):
             with pytest.raises((CosmosHttpResponseError, ServiceResponseError)) as exc_info:
                 await perform_write_operation(write_operation,
                                               container,
@@ -187,10 +198,9 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         assert initial_region in partition_info.unavailable_regional_endpoints
         assert initial_region != partition_info.current_region # west us 3 != west us
 
-        # Since we are failing every request, even though we retried to the next region, that retry should have failed as well
-        # This means we should have one extra failure - verify that the value makes sense
+        # 12 failures - 10 to trigger failover, 2 more to start counting again
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
-        assert failure_count == 3
+        assert failure_count == 2
 
     @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
     async def test_ppaf_exclude_regions_async(self, write_operation, exclude_client_regions):

From f982d218c49692c54bde866abe3b5f9fd0a5c7bc Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 27 Aug 2025 14:26:38 -0500
Subject: [PATCH 39/68] Update test_per_partition_circuit_breaker_sm_mrr.py

---
 .../tests/test_per_partition_circuit_breaker_sm_mrr.py       | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
index 7c63e8e82897..d2f93cf019c0 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
@@ -71,10 +71,11 @@ def setup_info(self, error, **kwargs):
         return setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate
 
     def test_stat_reset(self):
+        status_code = 500
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(
             0,
             CosmosHttpResponseError(
-                status_code=503,
+                status_code=status_code,
                 message="Some injected error.")
         )
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = \
@@ -103,7 +104,7 @@ def test_stat_reset(self):
                                             PK_VALUE,
                                             expected_uri)
                 except CosmosHttpResponseError as e:
-                    assert e.status_code == 503
+                    assert e.status_code == status_code
             validate_unhealthy_partitions(global_endpoint_manager, 0)
             validate_stats(global_endpoint_manager, 0,  2, 2, 0, 0, 0)
             sleep(25)

From d9ca7a43181b44177354402a26d637345c713804 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 27 Aug 2025 15:30:32 -0500
Subject: [PATCH 40/68] test fixes

---
 .../tests/test_per_partition_circuit_breaker_mm.py           | 5 +++--
 .../tests/test_per_partition_circuit_breaker_mm_async.py     | 5 +++--
 .../tests/test_per_partition_circuit_breaker_sm_mrr_async.py | 5 +++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index 8fe56c06b11b..8ab0f5f2e9b2 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -405,10 +405,11 @@ def setup_info(self, error, **kwargs):
         return container, doc, expected_uri, uri_down, fault_injection_container, custom_transport, predicate
 
     def test_stat_reset(self):
+        status_code = 500
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(
             0,
             CosmosHttpResponseError(
-                status_code=503,
+                status_code=status_code,
                 message="Some injected error.")
         )
         container, doc, expected_uri, uri_down, fault_injection_container, custom_transport, predicate = \
@@ -435,7 +436,7 @@ def test_stat_reset(self):
                                             PK_VALUE,
                                             expected_uri)
                 except CosmosHttpResponseError as e:
-                    assert e.status_code == 503
+                    assert e.status_code == status_code
             validate_unhealthy_partitions(global_endpoint_manager, 0)
             validate_stats(global_endpoint_manager, 2,  2, 2, 2, 0, 0)
             sleep(25)
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
index 40147314e4ff..095d3c6ff849 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
@@ -339,10 +339,11 @@ async def test_read_failure_rate_threshold_async(self, read_operation, error):
         await cleanup_method([custom_setup, setup])
 
     async def test_stat_reset_async(self):
+        status_code = 500
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(
             0,
             CosmosHttpResponseError(
-                status_code=503,
+                status_code=status_code,
                 message="Some injected error.")
         ))
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = \
@@ -371,7 +372,7 @@ async def test_stat_reset_async(self):
                                                   PK_VALUE,
                                                   expected_uri)
                 except CosmosHttpResponseError as e:
-                    assert e.status_code == 503
+                    assert e.status_code == status_code
             validate_unhealthy_partitions(global_endpoint_manager, 0)
             validate_stats(global_endpoint_manager, 2, 2, 2, 2, 0, 0)
             await asyncio.sleep(25)
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
index 9779b9c68362..6a81aea15b88 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
@@ -135,10 +135,11 @@ async def test_write_failure_rate_threshold_async(self, write_operation, error):
         await cleanup_method([custom_setup, setup])
 
     async def test_stat_reset_async(self):
+        status_code = 500
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(
             0,
             CosmosHttpResponseError(
-                status_code=503,
+                status_code=status_code,
                 message="Some injected error.")
         ))
         setup, doc, expected_uri, uri_down, custom_setup, custom_transport, predicate = \
@@ -167,7 +168,7 @@ async def test_stat_reset_async(self):
                                                   PK_VALUE,
                                                   expected_uri)
                 except CosmosHttpResponseError as e:
-                    assert e.status_code == 503
+                    assert e.status_code == status_code
             validate_unhealthy_partitions(global_endpoint_manager, 0)
             validate_stats(global_endpoint_manager, 0,  2, 2, 0, 0, 0)
             await asyncio.sleep(25)

From f1dce5dc37d83dcae6654b92e74e2fd095171d9c Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 27 Aug 2025 18:08:31 -0500
Subject: [PATCH 41/68] Update test_excluded_locations.py

---
 sdk/cosmos/azure-cosmos/tests/test_excluded_locations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_excluded_locations.py b/sdk/cosmos/azure-cosmos/tests/test_excluded_locations.py
index 7252593e41bf..e262cc8e8c4b 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_excluded_locations.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_excluded_locations.py
@@ -438,6 +438,7 @@ def test_delete_item(self, test_data):
             MOCK_HANDLER.reset()
 
             # API call: delete_item
+            container.upsert_item(body)
             if request_excluded_locations is None:
                 container.delete_item(item_id, PARTITION_KEY_VALUES)
             else:

From 1582cf3c3ba15aebc8ca5bf7233cb5a45b4350b7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 28 Aug 2025 19:09:23 -0500
Subject: [PATCH 42/68] small improvement to region-finding

---
 ...anager_per_partition_automatic_failover.py | 36 ++++++++++++-------
 ..._per_partition_automatic_failover_async.py | 36 ++++++++++++-------
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 6e06e3012c18..9eacc261b8d6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -34,7 +34,7 @@ class PartitionLevelFailoverInfo:
     Used to track the partition key range and the regions where it is available.
     """
     def __init__(self):
-        self.unavailable_regional_endpoints: Dict[str, RegionalRoutingContext] = {}
+        self.unavailable_regional_endpoints: Dict[str, str] = {}
         self.current_region = None
         self._lock = threading.Lock()
 
@@ -43,9 +43,16 @@ def try_move_to_next_location(
             available_account_regional_endpoints: Dict[str, str],
             endpoint_region: str,
             request: RequestObject) -> bool:
+        """
+        Tries to move to the next available regional endpoint for the partition key range.
+        :param Dict[str, str] available_account_regional_endpoints: The available regional endpoints
+        :param str endpoint_region: The current regional endpoint
+        :param RequestObject request: The request object containing the routing context.
+        :return: True if the move was successful, False otherwise.
+        :rtype: bool
+        """
         with self._lock:
             if endpoint_region != self.current_region:
-                logger.warning("PPAF - Moving to next available regional endpoint %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -58,7 +65,7 @@ def try_move_to_next_location(
                     continue
 
                 self.current_region = regional_endpoint
-                logger.info("PPAF - Moving to next available regional endpoint: %s", self.current_region)
+                logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -156,18 +163,23 @@ def resolve_service_endpoint_for_partition(
                 if request.location_endpoint_to_route is not None:
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
                     if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
-                        # If the current region is unavailable, we try to move to the next available region
-                        if not partition_failover_info.try_move_to_next_location(
+                        available_account_regional_endpoints = self.compute_available_preferred_regions(request)
+                        if endpoint_region != partition_failover_info.current_region:
+                            # this request has not yet seen there's an available region being used for this partition
+                            regional_endpoint = available_account_regional_endpoints[
+                                partition_failover_info.current_region]
+                            request.route_to_location(regional_endpoint)
+                        else:
+                            # If the current region is unavailable, we try to move to the next available region
+                            if not partition_failover_info.try_move_to_next_location(
                                 self.compute_available_preferred_regions(request),
                                 endpoint_region,
                                 request):
-                            logger.warning("All available regions for partition are unavailable. Refreshing cache.")
-                            # If no other region is available, we invalidate the cache and start once again from our
-                            # main write region in the account configurations
-                            self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
-                            request.clear_route_to_location()
-                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request,
-                                                                                                pk_range_wrapper)
+                                logger.warning("All available regions for partition are unavailable. Refreshing cache.")
+                                # If no other region is available, we invalidate the cache and start once again
+                                # from our main write region in the account configurations
+                                self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
+                                request.clear_route_to_location()
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         partition_failover_info.current_region = endpoint_region
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 40d54c64a981..adb47bed6931 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -34,7 +34,7 @@ class PartitionLevelFailoverInfo:
     Used to track the partition key range and the regions where it is available.
     """
     def __init__(self):
-        self.unavailable_regional_endpoints: Dict[str, RegionalRoutingContext] = {}
+        self.unavailable_regional_endpoints: Dict[str, str] = {}
         self.current_region = None
         self._lock = threading.Lock()
 
@@ -43,10 +43,16 @@ def try_move_to_next_location(
             available_account_regional_endpoints: Dict[str, str],
             endpoint_region: str,
             request: RequestObject) -> bool:
+        """
+        Tries to move to the next available regional endpoint for the partition key range.
+        :param Dict[str, str] available_account_regional_endpoints: The available regional endpoints
+        :param str endpoint_region: The current regional endpoint
+        :param RequestObject request: The request object containing the routing context.
+        :return: True if the move was successful, False otherwise.
+        :rtype: bool
+        """
         with self._lock:
             if endpoint_region != self.current_region:
-                logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
-                # make the actual endpoint since the current_region is just West US
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -76,6 +82,7 @@ def __init__(self, client: "CosmosClientConnection"):
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync, self).__init__(client)
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
+        self._lock = threading.Lock()
 
     def is_per_partition_automatic_failover_enabled(self) -> bool:
         if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
@@ -157,18 +164,23 @@ def resolve_service_endpoint_for_partition(
                 if request.location_endpoint_to_route is not None:
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
                     if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
-                        # If the current region is unavailable, we try to move to the next available region
-                        if not partition_failover_info.try_move_to_next_location(
+                        available_account_regional_endpoints = self.compute_available_preferred_regions(request)
+                        if endpoint_region != partition_failover_info.current_region:
+                            # this request has not yet seen there's an available region being used for this partition
+                            regional_endpoint = available_account_regional_endpoints[
+                                partition_failover_info.current_region]
+                            request.route_to_location(regional_endpoint)
+                        else:
+                            # If the current region is unavailable, we try to move to the next available region
+                            if not partition_failover_info.try_move_to_next_location(
                                 self.compute_available_preferred_regions(request),
                                 endpoint_region,
                                 request):
-                            logger.warning("All available regions for partition are unavailable. Refreshing cache.")
-                            # If no other region is available, we invalidate the cache and start once again from our
-                            # main write region in the account configurations
-                            self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
-                            request.clear_route_to_location()
-                            return self._resolve_service_endpoint_for_partition_circuit_breaker(request,
-                                                                                                pk_range_wrapper)
+                                logger.warning("All available regions for partition are unavailable. Refreshing cache.")
+                                # If no other region is available, we invalidate the cache and start once again
+                                # from our main write region in the account configurations
+                                self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
+                                request.clear_route_to_location()
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         endpoint_region = self.location_cache.get_location_from_endpoint(

From 8f7ec0ce42166c64de9220503ad27e4b60daa3b7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 29 Aug 2025 08:48:10 -0500
Subject: [PATCH 43/68] pylint

---
 ...tition_endpoint_manager_per_partition_automatic_failover.py | 3 +--
 ..._endpoint_manager_per_partition_automatic_failover_async.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 9eacc261b8d6..84035dd3316f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -16,7 +16,6 @@
     _GlobalPartitionEndpointManagerForCircuitBreaker
 from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
 from azure.cosmos.documents import _OperationType
-from azure.cosmos._location_cache import RegionalRoutingContext
 from azure.cosmos._request_object import RequestObject
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
 
@@ -25,7 +24,7 @@
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
-# pylint: disable=name-too-long, protected-access
+# pylint: disable=name-too-long, protected-access, too-many-nested-blocks
 #cspell:ignore PPAF, ppaf, ppcb
 
 class PartitionLevelFailoverInfo:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index adb47bed6931..8d0680632d0a 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -15,7 +15,6 @@
 from azure.cosmos.aio._global_partition_endpoint_manager_circuit_breaker_async import \
     _GlobalPartitionEndpointManagerForCircuitBreakerAsync
 from azure.cosmos.documents import _OperationType
-from azure.cosmos._location_cache import RegionalRoutingContext
 from azure.cosmos._partition_health_tracker import _PPAFPartitionThresholdsTracker
 from azure.cosmos._request_object import RequestObject
 from azure.cosmos._routing.routing_range import PartitionKeyRangeWrapper
@@ -25,7 +24,7 @@
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
-# pylint: disable=name-too-long, protected-access
+# pylint: disable=name-too-long, protected-access, too-many-nested-blocks
 #cspell:ignore PPAF, ppaf, ppcb
 
 class PartitionLevelFailoverInfo:

From effb6d1f99b3d7ee81336e273c732fec8287599d Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 29 Aug 2025 15:33:14 -0500
Subject: [PATCH 44/68] Update
 _global_partition_endpoint_manager_per_partition_automatic_failover.py

---
 ...artition_endpoint_manager_per_partition_automatic_failover.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 84035dd3316f..8fab11b3a1e3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -137,7 +137,6 @@ def try_ppaf_failover_threshold(
                 regional_context = (self.location_cache.
                                     account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
                 partition_level_info.unavailable_regional_endpoints[location] = regional_context
-                print(3)
 
     def resolve_service_endpoint_for_partition(
             self,

From 1e773f5abb0c4b996eaca45e17b4cbc21aee0933 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 29 Aug 2025 16:04:27 -0500
Subject: [PATCH 45/68] address comments, add threshold lock

---
 ...anager_per_partition_automatic_failover.py | 27 ++++++++++++------
 ..._per_partition_automatic_failover_async.py | 28 ++++++++++++-------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 8fab11b3a1e3..2a48ca9eb3b9 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -80,6 +80,7 @@ def __init__(self, client: "CosmosClientConnection"):
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover, self).__init__(client)
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
+        self._threshold_lock = threading.Lock()
 
     def is_per_partition_automatic_failover_enabled(self) -> bool:
         if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
@@ -129,14 +130,21 @@ def try_ppaf_failover_threshold(
             if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
                     >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
                                           Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
-                # If the PPAF threshold is reached, we reset the count and retry to the next region
-                self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
-                partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
-                location = self.location_cache.get_location_from_endpoint(
-                    str(request.location_endpoint_to_route))
-                regional_context = (self.location_cache.
-                                    account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
-                partition_level_info.unavailable_regional_endpoints[location] = regional_context
+                # If the PPAF threshold is reached, we reset the count and mark the endpoint unavailable
+                with self._threshold_lock:
+                    logger.warning("PPAF - Failover threshold reached for partition key range: %s", pk_range_wrapper)
+                    # Check for count again, since a previous request may have now reset the count
+                    if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
+                            >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
+                                                  Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
+                        self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+                        partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
+                        location = self.location_cache.get_location_from_endpoint(
+                            str(request.location_endpoint_to_route))
+                        regional_context = (self.location_cache.
+                                            account_read_regional_routing_contexts_by_location.
+                                            get(location).primary_endpoint)
+                        partition_level_info.unavailable_regional_endpoints[location] = regional_context
 
     def resolve_service_endpoint_for_partition(
             self,
@@ -173,7 +181,8 @@ def resolve_service_endpoint_for_partition(
                                 self.compute_available_preferred_regions(request),
                                 endpoint_region,
                                 request):
-                                logger.warning("All available regions for partition are unavailable. Refreshing cache.")
+                                logger.warning("All available regions for partition %s are unavailable."
+                                               " Refreshing cache.", pk_range_wrapper)
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
                                 self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 8d0680632d0a..f110146f21a2 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -81,7 +81,7 @@ def __init__(self, client: "CosmosClientConnection"):
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync, self).__init__(client)
         self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
-        self._lock = threading.Lock()
+        self._threshold_lock = threading.Lock()
 
     def is_per_partition_automatic_failover_enabled(self) -> bool:
         if not self._database_account_cache or not self._database_account_cache._EnablePerPartitionFailoverBehavior:
@@ -131,14 +131,21 @@ def try_ppaf_failover_threshold(
             if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
                     >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
                                           Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
-                # If the PPAF threshold is reached, we reset the count and retry to the next region
-                self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
-                partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
-                location = self.location_cache.get_location_from_endpoint(
-                    str(request.location_endpoint_to_route))
-                regional_context = (self.location_cache.
-                                    account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
-                partition_level_info.unavailable_regional_endpoints[location] = regional_context
+                # If the PPAF threshold is reached, we reset the count and mark the endpoint unavailable
+                with self._threshold_lock:
+                    logger.warning("PPAF - Failover threshold reached for partition key range: %s", pk_range_wrapper)
+                    # Check for count again, since a previous request may have now reset the count
+                    if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
+                            >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
+                                                  Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
+                        self.ppaf_thresholds_tracker.clear_pk_failures(pk_range_wrapper)
+                        partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
+                        location = self.location_cache.get_location_from_endpoint(
+                            str(request.location_endpoint_to_route))
+                        regional_context = (self.location_cache.
+                                            account_read_regional_routing_contexts_by_location.
+                                            get(location).primary_endpoint)
+                        partition_level_info.unavailable_regional_endpoints[location] = regional_context
 
     def resolve_service_endpoint_for_partition(
             self,
@@ -175,7 +182,8 @@ def resolve_service_endpoint_for_partition(
                                 self.compute_available_preferred_regions(request),
                                 endpoint_region,
                                 request):
-                                logger.warning("All available regions for partition are unavailable. Refreshing cache.")
+                                logger.warning("All available regions for partition %s are unavailable."
+                                               " Refreshing cache.", pk_range_wrapper)
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
                                 self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()

From 24a44d9bff0ab5a2f2344c1489eb8ed84a1e37c7 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 29 Aug 2025 16:06:11 -0500
Subject: [PATCH 46/68] add more comments

---
 ...rtition_endpoint_manager_per_partition_automatic_failover.py | 2 ++
 ...n_endpoint_manager_per_partition_automatic_failover_async.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 2a48ca9eb3b9..9441068e6f57 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -131,6 +131,8 @@ def try_ppaf_failover_threshold(
                     >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
                                           Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
                 # If the PPAF threshold is reached, we reset the count and mark the endpoint unavailable
+                # Once we mark the endpoint unavailable, the PPAF endpoint manager will try to move to the next
+                # available region for the partition key range
                 with self._threshold_lock:
                     logger.warning("PPAF - Failover threshold reached for partition key range: %s", pk_range_wrapper)
                     # Check for count again, since a previous request may have now reset the count
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index f110146f21a2..cdbc2153bc82 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -132,6 +132,8 @@ def try_ppaf_failover_threshold(
                     >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
                                           Constants.TIMEOUT_ERROR_THRESHOLD_PPAF_DEFAULT))):
                 # If the PPAF threshold is reached, we reset the count and mark the endpoint unavailable
+                # Once we mark the endpoint unavailable, the PPAF endpoint manager will try to move to the next
+                # available region for the partition key range
                 with self._threshold_lock:
                     logger.warning("PPAF - Failover threshold reached for partition key range: %s", pk_range_wrapper)
                     # Check for count again, since a previous request may have now reset the count

From c77209247611776c3f3c8b43ac12b49cd290c25f Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 19 Sep 2025 16:25:58 -0400
Subject: [PATCH 47/68] edge cases

---
 .../azure/cosmos/_cosmos_http_logging_policy.py            | 2 +-
 ...on_endpoint_manager_per_partition_automatic_failover.py | 7 ++++---
 ...point_manager_per_partition_automatic_failover_async.py | 7 ++++---
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_http_logging_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_http_logging_policy.py
index d159128c9b13..6b56dfb73fc6 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_http_logging_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_http_logging_policy.py
@@ -180,7 +180,7 @@ def _get_client_settings(global_endpoint_manager: Optional[_GlobalEndpointManage
             gem_client = global_endpoint_manager.client
             if gem_client and gem_client.connection_policy:
                 connection_policy: ConnectionPolicy = gem_client.connection_policy
-                client_preferred_regions = connection_policy.PreferredLocations
+                client_preferred_regions = global_endpoint_manager.location_cache.effective_preferred_locations
                 client_excluded_regions = connection_policy.ExcludedLocations
 
         if global_endpoint_manager.location_cache:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 9441068e6f57..af3315d90d5f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -51,7 +51,7 @@ def try_move_to_next_location(
         :rtype: bool
         """
         with self._lock:
-            if endpoint_region != self.current_region:
+            if endpoint_region != self.current_region and self.current_region is not None:
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -172,7 +172,8 @@ def resolve_service_endpoint_for_partition(
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
                     if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
                         available_account_regional_endpoints = self.compute_available_preferred_regions(request)
-                        if endpoint_region != partition_failover_info.current_region:
+                        if (partition_failover_info.current_region is not None and
+                                endpoint_region != partition_failover_info.current_region):
                             # this request has not yet seen there's an available region being used for this partition
                             regional_endpoint = available_account_regional_endpoints[
                                 partition_failover_info.current_region]
@@ -214,7 +215,7 @@ def compute_available_preferred_regions(
             excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
         else:
             excluded_locations = self.location_cache.connection_policy.ExcludedLocations
-        preferred_locations = self.PreferredLocations
+        preferred_locations = self.location_cache.effective_preferred_locations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
         available_regional_endpoints = {}
         for region, context in self.location_cache.account_read_regional_routing_contexts_by_location.items():
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index cdbc2153bc82..d3b0edbd467f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -51,7 +51,7 @@ def try_move_to_next_location(
         :rtype: bool
         """
         with self._lock:
-            if endpoint_region != self.current_region:
+            if endpoint_region != self.current_region and self.current_region is not None:
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
                 return True
@@ -173,7 +173,8 @@ def resolve_service_endpoint_for_partition(
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
                     if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
                         available_account_regional_endpoints = self.compute_available_preferred_regions(request)
-                        if endpoint_region != partition_failover_info.current_region:
+                        if (partition_failover_info.current_region is not None and
+                                endpoint_region != partition_failover_info.current_region):
                             # this request has not yet seen there's an available region being used for this partition
                             regional_endpoint = available_account_regional_endpoints[
                                 partition_failover_info.current_region]
@@ -217,7 +218,7 @@ def compute_available_preferred_regions(
             excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
         else:
             excluded_locations = self.location_cache.connection_policy.ExcludedLocations
-        preferred_locations = self.PreferredLocations
+        preferred_locations = self.location_cache.effective_preferred_locations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
         available_regional_endpoints = {}
         for region, context in self.location_cache.account_read_regional_routing_contexts_by_location.items():

From 3acda2406aeeb9643c08479e3b3c9c7359e77b9c Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 7 Oct 2025 14:40:39 -0400
Subject: [PATCH 48/68] changes from testing

---
 .../_endpoint_discovery_retry_policy.py       | 12 +++++++----
 ...anager_per_partition_automatic_failover.py | 16 ++++++++-------
 .../azure/cosmos/_retry_utility.py            |  3 +++
 .../azure/cosmos/_session_retry_policy.py     | 11 +++++-----
 ..._per_partition_automatic_failover_async.py | 20 ++++++++++++-------
 .../azure/cosmos/aio/_retry_utility_async.py  |  3 +++
 6 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index 14eed13d8dbc..cf589d6fa370 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -58,17 +58,17 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             return False
 
         if self.failover_retry_count >= self.Max_retry_attempt_count:
+            if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+                # only refresh the cache if PPAF is enabled once we're out of retries
+                self.global_endpoint_manager.refresh_needed = True
             return False
 
         self.failover_retry_count += 1
 
-        # set the refresh_needed flag to ensure that endpoint list is
-        # refreshed with new writable and readable locations
-        self.global_endpoint_manager.refresh_needed = True
-
         # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
         # and resolve the service endpoint for the partition range - otherwise, continue the default retry logic
         if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
+            #add log
             partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
             location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
                 str(self.request.location_endpoint_to_route))
@@ -78,6 +78,10 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
 
+        # set the refresh_needed flag to ensure that endpoint list is
+        # refreshed with new writable and readable locations
+        self.global_endpoint_manager.refresh_needed = True
+
         if self.request.location_endpoint_to_route:
             if _OperationType.IsReadOnlyOperation(self.request.operation_type):
                 # Mark current read endpoint as unavailable
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index af3315d90d5f..b98ac44219d1 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -179,17 +179,19 @@ def resolve_service_endpoint_for_partition(
                                 partition_failover_info.current_region]
                             request.route_to_location(regional_endpoint)
                         else:
-                            # If the current region is unavailable, we try to move to the next available region
-                            if not partition_failover_info.try_move_to_next_location(
-                                self.compute_available_preferred_regions(request),
-                                endpoint_region,
-                                request):
-                                logger.warning("All available regions for partition %s are unavailable."
-                                               " Refreshing cache.", pk_range_wrapper)
+                            if len(self.compute_available_preferred_regions(request)) == len(partition_failover_info.unavailable_regional_endpoints):
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
+                                logger.warning("PPAF - All available regions for partition %s are unavailable."
+                                               " Refreshing cache.", pk_range_wrapper)
                                 self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
                                 request.clear_route_to_location()
+                            else:
+                                # If the current region is unavailable, we try to move to the next available region
+                                partition_failover_info.try_move_to_next_location(
+                                    self.compute_available_preferred_regions(request),
+                                    endpoint_region,
+                                    request)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         partition_failover_info.current_region = endpoint_region
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index df8b8bb04bb0..8ecb0bd2bec7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -208,6 +208,9 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
+                if args:
+                    # record the failure for ppaf/circuit breaker tracking
+                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
index 1e530ad9e37e..e11cb4838047 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_session_retry_policy.py
@@ -110,11 +110,12 @@ def ShouldRetry(self, _exception):
                 if location in pk_failover_info.unavailable_regional_endpoints:
                     # If the request endpoint is unavailable, we need to resolve the endpoint for the request using the
                     # partition-level failover info
-                    location_endpoint = (self.global_endpoint_manager.location_cache.
-                                         account_read_regional_routing_contexts_by_location.
-                                         get(pk_failover_info.current_region).primary_endpoint)
-                    self.request.route_to_location(location_endpoint)
-                    return True
+                    if pk_failover_info.current_region is not None:
+                        location_endpoint = (self.global_endpoint_manager.location_cache.
+                                             account_read_regional_routing_contexts_by_location.
+                                             get(pk_failover_info.current_region).primary_endpoint)
+                        self.request.route_to_location(location_endpoint)
+                        return True
 
         # Resolve the endpoint for the request and pin the resolution to the resolved endpoint
         # This enables marking the endpoint unavailability on endpoint failover/unreachability
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index d3b0edbd467f..821f55d96d06 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -51,6 +51,7 @@ def try_move_to_next_location(
         :rtype: bool
         """
         with self._lock:
+            print("got lock to move to next location")
             if endpoint_region != self.current_region and self.current_region is not None:
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
@@ -67,6 +68,8 @@ def try_move_to_next_location(
                 logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
+                print(f"routing to {regional_endpoint} from {endpoint_region}")
+                print(f"current unavailable: {str(self.unavailable_regional_endpoints)}")
                 return True
 
             return False
@@ -175,22 +178,25 @@ def resolve_service_endpoint_for_partition(
                         available_account_regional_endpoints = self.compute_available_preferred_regions(request)
                         if (partition_failover_info.current_region is not None and
                                 endpoint_region != partition_failover_info.current_region):
+                            print("changed {} region to {} region (current)".format(endpoint_region, partition_failover_info.current_region))
                             # this request has not yet seen there's an available region being used for this partition
                             regional_endpoint = available_account_regional_endpoints[
                                 partition_failover_info.current_region]
                             request.route_to_location(regional_endpoint)
                         else:
-                            # If the current region is unavailable, we try to move to the next available region
-                            if not partition_failover_info.try_move_to_next_location(
-                                self.compute_available_preferred_regions(request),
-                                endpoint_region,
-                                request):
-                                logger.warning("All available regions for partition %s are unavailable."
-                                               " Refreshing cache.", pk_range_wrapper)
+                            if len(self.compute_available_preferred_regions(request)) == len(partition_failover_info.unavailable_regional_endpoints):
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
+                                logger.warning("All available regions for partition %s are unavailable."
+                                               " Refreshing cache.", pk_range_wrapper)
                                 self.partition_range_to_failover_info[pk_range_wrapper] = PartitionLevelFailoverInfo()
                                 request.clear_route_to_location()
+                            else:
+                                # If the current region is unavailable, we try to move to the next available region
+                                partition_failover_info.try_move_to_next_location(
+                                    self.compute_available_preferred_regions(request),
+                                    endpoint_region,
+                                    request)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
                         endpoint_region = self.location_cache.get_location_from_endpoint(
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 88caac506cb1..64d58e2bbb2a 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -205,6 +205,9 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
                     retry_policy.container_rid = cached_container["_rid"]
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
+                if args:
+                    # record the failure for circuit breaker tracking
+                    await global_endpoint_manager.record_ppcb_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:

From 9a6b17b5e5367683078f0c73d0998965c3c951fa Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 7 Oct 2025 16:36:14 -0400
Subject: [PATCH 49/68] pylint

---
 .../azure/cosmos/_endpoint_discovery_retry_policy.py          | 2 ++
 ...ition_endpoint_manager_per_partition_automatic_failover.py | 3 ++-
 ...endpoint_manager_per_partition_automatic_failover_async.py | 4 ++--
 .../azure-cosmos/azure/cosmos/aio/_retry_utility_async.py     | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index cf589d6fa370..c87373eb9560 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -23,6 +23,8 @@
 Azure Cosmos database service.
 """
 
+# cspell:ignore PPAF
+
 from azure.cosmos.documents import _OperationType
 
 class EndpointDiscoveryRetryPolicy(object):
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index b98ac44219d1..8502c9417fab 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -179,7 +179,8 @@ def resolve_service_endpoint_for_partition(
                                 partition_failover_info.current_region]
                             request.route_to_location(regional_endpoint)
                         else:
-                            if len(self.compute_available_preferred_regions(request)) == len(partition_failover_info.unavailable_regional_endpoints):
+                            if (len(self.compute_available_preferred_regions(request))
+                                    == len(partition_failover_info.unavailable_regional_endpoints)):
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
                                 logger.warning("PPAF - All available regions for partition %s are unavailable."
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 821f55d96d06..fe0fcf2ad7b2 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -178,13 +178,13 @@ def resolve_service_endpoint_for_partition(
                         available_account_regional_endpoints = self.compute_available_preferred_regions(request)
                         if (partition_failover_info.current_region is not None and
                                 endpoint_region != partition_failover_info.current_region):
-                            print("changed {} region to {} region (current)".format(endpoint_region, partition_failover_info.current_region))
                             # this request has not yet seen there's an available region being used for this partition
                             regional_endpoint = available_account_regional_endpoints[
                                 partition_failover_info.current_region]
                             request.route_to_location(regional_endpoint)
                         else:
-                            if len(self.compute_available_preferred_regions(request)) == len(partition_failover_info.unavailable_regional_endpoints):
+                            if (len(self.compute_available_preferred_regions(request)) ==
+                                    len(partition_failover_info.unavailable_regional_endpoints)):
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
                                 logger.warning("All available regions for partition %s are unavailable."
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 64d58e2bbb2a..bdc971d6bd61 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -47,7 +47,7 @@
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
-# cspell:ignore ppaf
+# cspell:ignore ppaf, ppcb
 
 # args [0] is the request object
 # args [1] is the connection policy

From 8f75444a7f9b18841f1a89b5eb162f174cb86e91 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Tue, 7 Oct 2025 21:09:10 -0400
Subject: [PATCH 50/68] fixes pylint/mypy

---
 ...int_manager_per_partition_automatic_failover.py | 14 +++++++-------
 .../azure/cosmos/_partition_health_tracker.py      |  2 +-
 ...nager_per_partition_automatic_failover_async.py | 14 +++++++-------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 8502c9417fab..328f05bcd23e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -8,7 +8,7 @@
 import threading
 import os
 
-from typing import Dict, TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
 from azure.cosmos._constants import _Constants as Constants
@@ -32,14 +32,14 @@ class PartitionLevelFailoverInfo:
     Holds information about the partition level regional failover.
     Used to track the partition key range and the regions where it is available.
     """
-    def __init__(self):
-        self.unavailable_regional_endpoints: Dict[str, str] = {}
+    def __init__(self) -> None:
+        self.unavailable_regional_endpoints: dict[str, str] = {}
         self.current_region = None
         self._lock = threading.Lock()
 
     def try_move_to_next_location(
             self,
-            available_account_regional_endpoints: Dict[str, str],
+            available_account_regional_endpoints: dict[str, str],
             endpoint_region: str,
             request: RequestObject) -> bool:
         """
@@ -76,9 +76,9 @@ class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover(_GlobalPar
     This internal class implements the logic for partition endpoint management for
     geo-replicated database accounts.
     """
-    def __init__(self, client: "CosmosClientConnection"):
+    def __init__(self, client: "CosmosClientConnection") -> None:
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover, self).__init__(client)
-        self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
+        self.partition_range_to_failover_info: dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
         self._threshold_lock = threading.Lock()
 
@@ -207,7 +207,7 @@ def resolve_service_endpoint_for_partition(
     def compute_available_preferred_regions(
             self,
             request: RequestObject
-    ) -> Dict[str, str]:
+    ) -> dict[str, str]:
         """
         Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
         :param RequestObject request: The request object containing the routing context.
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
index 0759aa79ca48..8218950a8dff 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
@@ -311,7 +311,7 @@ class _PPAFPartitionThresholdsTracker(object):
     """
 
     def __init__(self) -> None:
-        self.pk_range_wrapper_to_failure_count: Dict[PartitionKeyRangeWrapper, int] = {}
+        self.pk_range_wrapper_to_failure_count: dict[PartitionKeyRangeWrapper, int] = {}
 
     def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
         if pk_range_wrapper not in self.pk_range_wrapper_to_failure_count:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index fe0fcf2ad7b2..4c76dda04748 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -8,7 +8,7 @@
 import threading
 import os
 
-from typing import Dict, TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Optional
 
 from azure.cosmos.http_constants import ResourceType
 from azure.cosmos._constants import _Constants as Constants
@@ -32,14 +32,14 @@ class PartitionLevelFailoverInfo:
     Holds information about the partition level regional failover.
     Used to track the partition key range and the regions where it is available.
     """
-    def __init__(self):
-        self.unavailable_regional_endpoints: Dict[str, str] = {}
+    def __init__(self) -> None:
+        self.unavailable_regional_endpoints: dict[str, str] = {}
         self.current_region = None
         self._lock = threading.Lock()
 
     def try_move_to_next_location(
             self,
-            available_account_regional_endpoints: Dict[str, str],
+            available_account_regional_endpoints: dict[str, str],
             endpoint_region: str,
             request: RequestObject) -> bool:
         """
@@ -80,9 +80,9 @@ class _GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync(
     This internal class implements the logic for partition endpoint management for
     geo-replicated database accounts.
     """
-    def __init__(self, client: "CosmosClientConnection"):
+    def __init__(self, client: "CosmosClientConnection") -> None:
         super(_GlobalPartitionEndpointManagerForPerPartitionAutomaticFailoverAsync, self).__init__(client)
-        self.partition_range_to_failover_info: Dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
+        self.partition_range_to_failover_info: dict[PartitionKeyRangeWrapper, PartitionLevelFailoverInfo] = {}
         self.ppaf_thresholds_tracker = _PPAFPartitionThresholdsTracker()
         self._threshold_lock = threading.Lock()
 
@@ -213,7 +213,7 @@ def resolve_service_endpoint_for_partition(
     def compute_available_preferred_regions(
             self,
             request: RequestObject
-    ) -> Dict[str, str]:
+    ) -> dict[str, str]:
         """
         Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
         :param RequestObject request: The request object containing the routing context.

From 0ccd9bfa703654f54c4ba24a8e05f01653777047 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 8 Oct 2025 09:12:02 -0400
Subject: [PATCH 51/68] mypy complaining about assigning str to none

---
 ...rtition_endpoint_manager_per_partition_automatic_failover.py | 2 +-
 ...n_endpoint_manager_per_partition_automatic_failover_async.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 328f05bcd23e..991e5709eda7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -34,8 +34,8 @@ class PartitionLevelFailoverInfo:
     """
     def __init__(self) -> None:
         self.unavailable_regional_endpoints: dict[str, str] = {}
-        self.current_region = None
         self._lock = threading.Lock()
+        self.current_region: Optional[str] = None
 
     def try_move_to_next_location(
             self,
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 4c76dda04748..035cb5193548 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -34,8 +34,8 @@ class PartitionLevelFailoverInfo:
     """
     def __init__(self) -> None:
         self.unavailable_regional_endpoints: dict[str, str] = {}
-        self.current_region = None
         self._lock = threading.Lock()
+        self.current_region: Optional[str] = None
 
     def try_move_to_next_location(
             self,

From f4e4d655b3acdf9872f36d50c933d93bdeb33242 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 8 Oct 2025 10:03:33 -0400
Subject: [PATCH 52/68] testing changes - will roll back later

---
 .../azure/cosmos/aio/_asynchronous_request.py | 28 +++++++++++
 .../tests/workloads/r_w_q_workload.py         | 48 +++++++++++++++++--
 .../tests/workloads/workload_utils.py         | 17 ++++++-
 3 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 1cd2a22039b4..8981104688b1 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -24,6 +24,8 @@
 import copy
 import json
 import time
+from datetime import datetime, timezone
+import logging
 
 from urllib.parse import urlparse
 from azure.core.exceptions import DecodeError  # type: ignore
@@ -72,6 +74,8 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
         if kwargs['timeout'] <= 0:
             raise exceptions.CosmosClientTimeoutError()
 
+    route_start = time.perf_counter()
+
     if request_params.endpoint_override:
         base_url = request_params.endpoint_override
     else:
@@ -98,6 +102,12 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
         and not connection_policy.DisableSSLVerification
     )
 
+    route_end = time.perf_counter()    
+
+    route_duration = (route_end - route_start) * 1000
+
+    start = time.perf_counter()
+
     if connection_policy.SSLConfiguration or "connection_cert" in kwargs:
         ca_certs = connection_policy.SSLConfiguration.SSLCaCerts
         cert_files = (connection_policy.SSLConfiguration.SSLCertFile, connection_policy.SSLConfiguration.SSLKeyFile)
@@ -125,6 +135,24 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
             **kwargs
         )
 
+    end = time.perf_counter()
+    duration = (end - start) * 1000
+
+    logger = logging.getLogger("internal_requests")
+    response_time = datetime.now(timezone.utc)
+    print_string = f"Response time: {response_time.isoformat()} | "
+    print_string += f"Request URL: {request.url} | "
+    print_string += f"Resource type: {request.headers['x-ms-thinclient-proxy-resource-type']} | "
+    print_string += f"Operation type: {request.headers['x-ms-thinclient-proxy-operation-type']} | "
+    print_string += f"Status code: {response.http_response.status_code} | "
+    print_string += f"Sub-status code: {response.http_response.headers.get('x-ms-substatus', 'N/A')} | "
+    print_string += f"Routing duration: {route_duration} ms | "
+    print_string += f"Request/response duration: {duration} ms | "
+    print_string += f"Activity Id: {request.headers.get('x-ms-activity-id', 'N/A')} |"
+    print_string += f"Partition Id: {request.headers.get('x-ms-cosmos-internal-partition-id', 'N/A')} |"
+    print_string += f"Physical Id: {request.headers.get('x-ms-cosmos-physical-partition-id', 'N/A')} |"
+    logger.info(print_string)
+
     response = response.http_response
     headers = copy.copy(response.headers)
 
diff --git a/sdk/cosmos/azure-cosmos/tests/workloads/r_w_q_workload.py b/sdk/cosmos/azure-cosmos/tests/workloads/r_w_q_workload.py
index 5e1db6425142..0d7730c6f86e 100644
--- a/sdk/cosmos/azure-cosmos/tests/workloads/r_w_q_workload.py
+++ b/sdk/cosmos/azure-cosmos/tests/workloads/r_w_q_workload.py
@@ -3,6 +3,9 @@
 import sys
 
 from azure.cosmos import documents
+from datetime import datetime, timezone
+import time
+from workload_utils import _get_upsert_item
 from workload_utils import *
 from workload_configs import *
 sys.path.append(r"/")
@@ -10,7 +13,27 @@
 from azure.cosmos.aio import CosmosClient as AsyncClient
 import asyncio
 
+async def log_request_counts(counter):
+    while True:
+        await asyncio.sleep(300)  # 5 minutes
+        count = counter["count"]
+        duration = counter["upsert_time"] + counter["read_time"]
+        print("Current UTC time:", datetime.now(timezone.utc))
+        print(f"Executed {count} requests in the last 5 minutes")
+        print(f"Errors in the last 5 minutes: {counter['error_count']}")
+        print(f"Per-request latency: {duration / count if count > 0 else 0} ms")
+        print(f"Upsert latency: {counter['upsert_time'] / (count / 2) if count > 0 else 0} ms")
+        print(f"Read latency: {counter['read_time'] / (count / 2) if count > 0 else 0} ms")
+        print("-------------------------------")
+        counter["count"] = 0  # reset for next interval
+        counter["upsert_time"] = 0
+        counter["read_time"] = 0
+        counter["error_count"] = 0
+
 async def run_workload(client_id, client_logger):
+    counter = {"count": 0, "upsert_time": 0, "read_time": 0, "error_count": 0}
+    # Start background task
+    asyncio.create_task(log_request_counts(counter))
     connectionPolicy = documents.ConnectionPolicy()
     connectionPolicy.UseMultipleWriteLocations = USE_MULTIPLE_WRITABLE_LOCATIONS
     async with AsyncClient(COSMOS_URI, COSMOS_CREDENTIAL, connection_policy=connectionPolicy,
@@ -23,15 +46,32 @@ async def run_workload(client_id, client_logger):
 
         while True:
             try:
-                await upsert_item_concurrently(cont, REQUEST_EXCLUDED_LOCATIONS, CONCURRENT_REQUESTS)
-                await read_item_concurrently(cont, REQUEST_EXCLUDED_LOCATIONS, CONCURRENT_REQUESTS)
-                await query_items_concurrently(cont, REQUEST_EXCLUDED_LOCATIONS, CONCURRENT_QUERIES)
+                upsert_start = time.perf_counter()
+                up_item = _get_upsert_item()
+                await cont.upsert_item(up_item)
+                elapsed = time.perf_counter() - upsert_start
+                counter["count"] += 1
+                counter["upsert_time"] += elapsed
+
+                read_start = time.perf_counter()
+                item = get_existing_random_item()
+                await cont.read_item(item["id"], item[PARTITION_KEY])
+                elapsed = time.perf_counter() - read_start
+                counter["count"] += 1
+                counter["read_time"] += elapsed
+
+                # await upsert_item_concurrently(cont, REQUEST_EXCLUDED_LOCATIONS, CONCURRENT_REQUESTS)
+                # await read_item_concurrently(cont, REQUEST_EXCLUDED_LOCATIONS, CONCURRENT_REQUESTS)
+                # await query_items_concurrently(cont, REQUEST_EXCLUDED_LOCATIONS, CONCURRENT_QUERIES)
             except Exception as e:
+                counter["error_count"] += 1
                 client_logger.info("Exception in application layer")
-                client_logger.error(e)
 
 
 if __name__ == "__main__":
     file_name = os.path.basename(__file__)
     prefix, logger = create_logger(file_name)
+    create_inner_logger()
+    utc_now = datetime.now(timezone.utc)
+    print("Current UTC time:", utc_now)
     asyncio.run(run_workload(prefix, logger))
diff --git a/sdk/cosmos/azure-cosmos/tests/workloads/workload_utils.py b/sdk/cosmos/azure-cosmos/tests/workloads/workload_utils.py
index 6a0f95128e5d..fe3d69b3bfbe 100644
--- a/sdk/cosmos/azure-cosmos/tests/workloads/workload_utils.py
+++ b/sdk/cosmos/azure-cosmos/tests/workloads/workload_utils.py
@@ -3,6 +3,7 @@
 import asyncio
 import os
 import random
+import sys
 import uuid
 from datetime import datetime
 from logging.handlers import RotatingFileHandler
@@ -160,15 +161,27 @@ def create_logger(file_name):
     handler = RotatingFileHandler(
         "log-" + get_user_agent(prefix) + '.log',
         maxBytes=1024 * 1024 * 10,  # 10 mb
-        backupCount=2
+        backupCount=5
     )
     logger.setLevel(LOG_LEVEL)
     # create filters for the logger handler to reduce the noise
     workload_logger_filter = WorkloadLoggerFilter()
-    handler.addFilter(workload_logger_filter)
+    # handler.addFilter(workload_logger_filter)
     logger.addHandler(handler)
     return prefix, logger
 
+def create_inner_logger(file_name="internal_logger_tues"):
+    logger = logging.getLogger("internal_requests")
+    prefix = os.path.splitext(file_name)[0] + "-" + str(os.getpid())
+    # Create a rotating file handler
+    handler = RotatingFileHandler(
+        "log-" + file_name + '.log',
+        maxBytes=1024 * 1024 * 10,  # 10 mb
+        backupCount=5
+    )
+    logger.setLevel(LOG_LEVEL)
+    logger.addHandler(handler)
+
 
 class WorkloadLoggerFilter(logging.Filter):
     def filter(self, record):

From 8f87b13f76facb15f4d033f1f037f1ef1c2c8a04 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 9 Oct 2025 19:30:56 -0400
Subject: [PATCH 53/68] Update _endpoint_discovery_retry_policy.py

---
 .../cosmos/_endpoint_discovery_retry_policy.py    | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index c87373eb9560..fd209603a620 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -60,17 +60,17 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             return False
 
         if self.failover_retry_count >= self.Max_retry_attempt_count:
-            if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
-                # only refresh the cache if PPAF is enabled once we're out of retries
-                self.global_endpoint_manager.refresh_needed = True
             return False
 
         self.failover_retry_count += 1
 
+        # set the refresh_needed flag to ensure that endpoint list is
+        # refreshed with new writable and readable locations
+        self.global_endpoint_manager.refresh_needed = True
+
         # If per partition automatic failover is applicable, we mark the current endpoint as unavailable
         # and resolve the service endpoint for the partition range - otherwise, continue the default retry logic
         if self.global_endpoint_manager.is_per_partition_automatic_failover_applicable(self.request):
-            #add log
             partition_level_info = self.global_endpoint_manager.partition_range_to_failover_info[self.pk_range_wrapper]
             location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
                 str(self.request.location_endpoint_to_route))
@@ -80,10 +80,6 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
 
-        # set the refresh_needed flag to ensure that endpoint list is
-        # refreshed with new writable and readable locations
-        self.global_endpoint_manager.refresh_needed = True
-
         if self.request.location_endpoint_to_route:
             if _OperationType.IsReadOnlyOperation(self.request.operation_type):
                 # Mark current read endpoint as unavailable
@@ -99,8 +95,7 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
         self.request.clear_route_to_location()
 
         # set location-based routing directive based on retry count
-        # simulating single master writes by ensuring usePreferredLocations
-        # is set to false
+        # simulating single master writes by ensuring usePreferredLocations is set to false
         # reasoning being that 403.3 is only expected for write region failover in single writer account
         # and we must rely on account locations as they are the source of truth
         self.request.route_to_location_with_preferred_location_flag(self.failover_retry_count, False)

From 3e1f6bec8641995b094d1913ed0462b94842549a Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 16 Oct 2025 17:53:54 -0700
Subject: [PATCH 54/68] Update _asynchronous_request.py

---
 .../azure-cosmos/azure/cosmos/aio/_asynchronous_request.py   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 8981104688b1..945c4615b49e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -149,9 +149,10 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
     print_string += f"Routing duration: {route_duration} ms | "
     print_string += f"Request/response duration: {duration} ms | "
     print_string += f"Activity Id: {request.headers.get('x-ms-activity-id', 'N/A')} |"
-    print_string += f"Partition Id: {request.headers.get('x-ms-cosmos-internal-partition-id', 'N/A')} |"
-    print_string += f"Physical Id: {request.headers.get('x-ms-cosmos-physical-partition-id', 'N/A')} |"
+    print_string += f"Partition Id: {response.http_response.headers.get('x-ms-cosmos-internal-partition-id', 'N/A')} |"
+    print_string += f"Physical Id: {response.http_response.headers.get('x-ms-cosmos-physical-partition-id', 'N/A')} |"
     logger.info(print_string)
+    print(print_string)
 
     response = response.http_response
     headers = copy.copy(response.headers)

From 42817fcf4aa6c3b1631a4157397e2dd83bfb4766 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:07:07 -0700
Subject: [PATCH 55/68] add user agent feature flags

---
 .../azure-cosmos/azure/cosmos/_constants.py   | 18 ++++++++++++++-
 ...anager_per_partition_automatic_failover.py |  2 ++
 .../azure/cosmos/_retry_utility.py            |  8 +++++++
 .../azure-cosmos/azure/cosmos/_utils.py       | 23 ++++++++++++++++++-
 ..._per_partition_automatic_failover_async.py |  2 ++
 .../azure/cosmos/aio/_retry_utility_async.py  |  8 +++++++
 .../tests/_fault_injection_transport.py       |  4 ++--
 .../test_per_partition_automatic_failover.py  | 13 +++++++++++
 ..._per_partition_automatic_failover_async.py |  9 +++++++-
 .../test_per_partition_circuit_breaker_mm.py  | 13 +++++++++++
 ..._per_partition_circuit_breaker_mm_async.py | 14 ++++++++---
 ...st_per_partition_circuit_breaker_sm_mrr.py | 10 +++++++-
 ..._partition_circuit_breaker_sm_mrr_async.py | 16 +++++++++----
 13 files changed, 127 insertions(+), 13 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
index 149c2d4daf99..a82994414996 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
@@ -22,7 +22,7 @@
 """Class for defining internal constants in the Azure Cosmos database service.
 """
 
-
+from enum import IntEnum
 from typing_extensions import Literal
 # cspell:ignore PPAF
 
@@ -106,3 +106,19 @@ class Kwargs:
         """Whether to retry write operations if they fail. Used either at client level or request level."""
 
         EXCLUDED_LOCATIONS: Literal["excludedLocations"] = "excludedLocations"
+
+    class UserAgentFeatureFlags(IntEnum):
+        """
+        User agent feature flags.
+        Each flag represents a bit in a number to encode what features are enabled. Therefore, the first feature flag
+        will be 1, the second 2, the third 4, etc. When constructing the user agent suffix, the feature flags will be
+        used to encode a unique number representing the features enabled. This number will be converted into a hex
+        string following the prefix "F" to save space in the user agent as it is limited and appended to the user agent
+        suffix. This number will then be used to determine what features are enabled by decoding the hex string back
+        to a number and checking what bits are set.
+
+        Example:
+            If the user agent suffix has "F3", this means that flags 1 and 2.
+        """
+        PER_PARTITION_AUTOMATIC_FAILOVER = 1
+        PER_PARTITION_CIRCUIT_BREAKER = 2
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 991e5709eda7..0e912fbace67 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -218,6 +218,8 @@ def compute_available_preferred_regions(
             excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
         else:
             excluded_locations = self.location_cache.connection_policy.ExcludedLocations
+        if excluded_locations is None:
+            excluded_locations = []
         preferred_locations = self.location_cache.effective_preferred_locations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
         available_regional_endpoints = {}
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index c61a59410671..d460fb0364d9 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -43,6 +43,7 @@
 from .exceptions import CosmosHttpResponseError
 from .http_constants import HttpHeaders, StatusCodes, SubStatusCodes, ResourceType
 from ._cosmos_http_logging_policy import _log_diagnostics_error
+from ._utils import get_user_agent_features
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
@@ -114,6 +115,13 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
         container_recreate_retry_policy = _container_recreate_retry_policy.ContainerRecreateRetryPolicy(
             client, client._container_properties_cache, None, *args)
 
+    user_agent_features = get_user_agent_features(global_endpoint_manager)
+    if len(user_agent_features) > 0:
+        user_agent = kwargs.pop("user_agent", client._user_agent)
+        user_agent = "{} {}".format(user_agent, user_agent_features)
+        kwargs.update({"user_agent": user_agent})
+        kwargs.update({"user_agent_overwrite": True})
+
     while True:
         client_timeout = kwargs.get('timeout')
         start_time = time.time()
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
index 9144afca613d..8bb57ccd6562 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
@@ -27,10 +27,12 @@
 import base64
 import json
 import time
+import os
 from typing import Any, Optional, Tuple
-
+from ._constants import _Constants
 from ._version import VERSION
 
+# cspell:ignore ppcb
 
 def get_user_agent(suffix: Optional[str] = None) -> str:
     os_name = safe_user_agent_header(platform.platform())
@@ -146,3 +148,22 @@ def valid_key_value_exist(
     :rtype: bool
     """
     return key in kwargs and kwargs[key] is not invalid_value
+
+
+def get_user_agent_features(global_endpoint_manager: Any) -> str:
+    """Check the account and client configurations in order to add feature flags to the user agent.
+
+    :param Any global_endpoint_manager: The global endpoint manager instance used to check against.
+    :return: The string representing the user agent features to include.
+    :rtype: str
+    """
+    feature_flag = 0
+    if global_endpoint_manager._database_account_cache is not None:
+        if global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior is True:
+            feature_flag += _Constants.UserAgentFeatureFlags.PER_PARTITION_AUTOMATIC_FAILOVER
+    ppcb_check = os.environ.get(
+        _Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
+        _Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower()
+    if ppcb_check == "true" or feature_flag > 0:
+        feature_flag += _Constants.UserAgentFeatureFlags.PER_PARTITION_CIRCUIT_BREAKER
+    return f"| F{feature_flag}" if feature_flag > 0 else ""
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 035cb5193548..0e91a5e42069 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -224,6 +224,8 @@ def compute_available_preferred_regions(
             excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
         else:
             excluded_locations = self.location_cache.connection_policy.ExcludedLocations
+        if excluded_locations is None:
+            excluded_locations = []
         preferred_locations = self.location_cache.effective_preferred_locations
         available_regions = [item for item in preferred_locations if item not in excluded_locations]
         available_regional_endpoints = {}
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index 80dffa6aee72..f54cd61bfd0b 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -44,6 +44,7 @@
 from ..exceptions import CosmosHttpResponseError
 from ..http_constants import HttpHeaders, StatusCodes, SubStatusCodes
 from .._cosmos_http_logging_policy import _log_diagnostics_error
+from .._utils import get_user_agent_features
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
@@ -113,6 +114,13 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
         container_recreate_retry_policy = ContainerRecreateRetryPolicy(
             client, client._container_properties_cache, None, *args)
 
+    user_agent_features = get_user_agent_features(global_endpoint_manager)
+    if len(user_agent_features) > 0:
+        user_agent = kwargs.pop("user_agent", client._user_agent)
+        user_agent = "{} {}".format(user_agent, user_agent_features)
+        kwargs.update({"user_agent": user_agent})
+        kwargs.update({"user_agent_overwrite": True})
+
     while True:
         client_timeout = kwargs.get('timeout')
         start_time = time.time()
diff --git a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
index e17cba91ee0a..f71a21003c98 100644
--- a/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
+++ b/sdk/cosmos/azure-cosmos/tests/_fault_injection_transport.py
@@ -26,7 +26,7 @@
 import logging
 import sys
 from time import sleep
-from typing import Callable, Optional, Any, MutableMapping
+from typing import Callable, Optional, Any, MutableMapping, Mapping, Tuple, Sequence
 
 from azure.core.pipeline.transport import HttpRequest, HttpResponse
 from azure.core.pipeline.transport._requests_basic import RequestsTransport, RequestsTransportResponse
@@ -355,7 +355,7 @@ def __init__(
             self.files: Optional[Any] = files
             self.data: Optional[Any] = data
             self.multipart_mixed_info: Optional[
-                Tuple[Sequence[Any], Sequence[Any], Optional[str], Dict[str, Any]]] = None
+                Tuple[Sequence[Any], Sequence[Any], Optional[str], dict[str, Any]]] = None
 
     class MockHttpResponse(RequestsTransportResponse):
         def __init__(self, request: HttpRequest, status_code: int, content: Optional[Any] = None):
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index b3fede1c9f65..8f34f14a3913 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -264,11 +264,24 @@ def test_ppaf_session_unavailable_retry(self, write_operation, error):
         # We verify that the read request was going to the correct region by using the raw_response_hook
         fault_injection_container.read_item(doc_fail_id, PK_VALUE, raw_response_hook=session_retry_hook)
 
+    def test_ppaf_user_agent_feature_flag(self):
+        # Simple test to verify the user agent suffix is being updated with the relevant feature flags
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info()
+        fault_injection_container = custom_setup['col']
+        # Create a document to check the response headers
+        fault_injection_container.upsert_item(body={'id': doc_success_id, 'pk': PK_VALUE, 'name': 'sample document', 'key': 'value'},
+                                              raw_response_hook=ppaf_user_agent_hook)
+
 def session_retry_hook(raw_response):
     if raw_response.http_request.headers.get('x-ms-thinclient-proxy-resource-type') != 'databaseaccount':
         # This hook is used to verify the request routing that happens after the session retry logic
         region_string = "-" + REGION_2.replace(' ', '').lower() + "."
         assert region_string in raw_response.http_request.url
 
+def ppaf_user_agent_hook(raw_response):
+    # Used to verify the user agent feature flags
+    user_agent = raw_response.http_request.headers.get('user-agent')
+    assert user_agent.endswith('| F3')
+
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index ac0e97576ed2..214662e20603 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -16,7 +16,7 @@
 from azure.cosmos._request_object import RequestObject
 from _fault_injection_transport import FaultInjectionTransport
 from _fault_injection_transport_async import FaultInjectionTransportAsync
-from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors, session_retry_hook
+from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors, session_retry_hook, ppaf_user_agent_hook
 from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors, write_operations_and_boolean
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
@@ -258,6 +258,13 @@ async def test_ppaf_session_unavailable_retry_async(self, write_operation, error
         # We verify that the read request was going to the correct region by using the raw_response_hook
         fault_injection_container.read_item(doc_fail_id, PK_VALUE, raw_response_hook=session_retry_hook)
 
+    async def test_ppaf_user_agent_feature_flag_async(self):
+        # Simple test to verify the user agent suffix is being updated with the relevant feature flags
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info()
+        fault_injection_container = custom_setup['col']
+        # Create a document to check the response headers
+        await fault_injection_container.upsert_item(body={'id': doc_success_id, 'pk': PK_VALUE, 'name': 'sample document', 'key': 'value'},
+                                                    raw_response_hook=ppaf_user_agent_hook)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index 8ab0f5f2e9b2..8e4ab4386695 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -498,6 +498,14 @@ def test_service_request_error(self, read_operation, write_operation):
         # there shouldn't be region marked as unavailable
         assert len(global_endpoint_manager.location_cache.location_unavailability_info_by_endpoint) == 1
 
+    def test_circuit_breaker_user_agent_feature_flag_mm(self):
+        # Simple test to verify the user agent suffix is being updated with the relevant feature flags
+        custom_setup = self.setup_method_with_custom_transport(None)
+        container = custom_setup['col']
+        # Create a document to check the response headers
+        container.upsert_item(body={'id': str(uuid.uuid4()), 'pk': PK_VALUE, 'name': 'sample document', 'key': 'value'},
+                                              raw_response_hook=user_agent_hook)
+
     # test cosmos client timeout
 
 if __name__ == '__main__':
@@ -520,3 +528,8 @@ def validate_stats(global_endpoint_manager,
         assert health_info.write_failure_count == expected_write_failure_count
         assert health_info.read_success_count == expected_read_success_count
         assert health_info.write_success_count == expected_write_success_count
+
+def user_agent_hook(raw_response):
+    # Used to verify the user agent feature flags
+    user_agent = raw_response.http_request.headers.get('user-agent')
+    assert user_agent.endswith('| F2')
\ No newline at end of file
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
index f8d06687483a..90131646c17a 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm_async.py
@@ -4,7 +4,7 @@
 import os
 import unittest
 import uuid
-from typing import Any
+from typing import Any, Union
 
 import pytest
 from azure.core.pipeline.transport._aiohttp import AioHttpTransport
@@ -18,7 +18,7 @@
 from test_per_partition_circuit_breaker_mm import create_doc, read_operations_and_errors, \
     write_operations_and_errors, operations, REGION_1, REGION_2, CHANGE_FEED, CHANGE_FEED_PK, CHANGE_FEED_EPK, READ, \
     CREATE, READ_ALL_ITEMS, DELETE_ALL_ITEMS_BY_PARTITION_KEY, QUERY, QUERY_PK, BATCH, UPSERT, REPLACE, PATCH, DELETE, \
-    PK_VALUE, validate_unhealthy_partitions, validate_response_uri
+    PK_VALUE, validate_unhealthy_partitions, validate_response_uri, user_agent_hook
 from test_per_partition_circuit_breaker_mm import validate_stats
 
 COLLECTION = "created_collection"
@@ -111,7 +111,7 @@ class TestPerPartitionCircuitBreakerMMAsync:
     TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
-    async def setup_method_with_custom_transport(self, custom_transport: AioHttpTransport, default_endpoint=host, **kwargs):
+    async def setup_method_with_custom_transport(self, custom_transport: Union[AioHttpTransport, Any], default_endpoint=host, **kwargs):
         container_id = kwargs.pop("container_id", None)
         if not container_id:
             container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
@@ -481,5 +481,13 @@ async def concurrent_upsert():
             _partition_health_tracker.INITIAL_UNAVAILABLE_TIME_MS = original_unavailable_time
             await cleanup_method([custom_setup, setup])
 
+    async def test_circuit_breaker_user_agent_feature_flag_mm_async(self):
+        # Simple test to verify the user agent suffix is being updated with the relevant feature flags
+        custom_setup = await self.setup_method_with_custom_transport(None)
+        container = custom_setup['col']
+        # Create a document to check the response headers
+        await container.upsert_item(body={'id': str(uuid.uuid4()), 'pk': PK_VALUE, 'name': 'sample document', 'key': 'value'},
+                                              raw_response_hook=user_agent_hook)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
index d92c46c0f622..0ec3df11d270 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr.py
@@ -15,7 +15,7 @@
 from azure.cosmos.exceptions import CosmosHttpResponseError
 from _fault_injection_transport import FaultInjectionTransport
 from test_per_partition_circuit_breaker_mm import create_doc, write_operations_and_errors, operations, REGION_1, \
-    REGION_2, PK_VALUE, perform_write_operation, perform_read_operation, CREATE, READ, validate_stats
+    REGION_2, PK_VALUE, perform_write_operation, perform_read_operation, CREATE, READ, validate_stats, user_agent_hook
 
 COLLECTION = "created_collection"
 
@@ -235,6 +235,14 @@ def test_service_request_error(self, read_operation, write_operation):
         # there shouldn't be region marked as unavailable
         assert len(global_endpoint_manager.location_cache.location_unavailability_info_by_endpoint) == 1
 
+    def test_circuit_breaker_user_agent_feature_flag_sm(self):
+        # Simple test to verify the user agent suffix is being updated with the relevant feature flags
+        custom_setup = self.setup_method_with_custom_transport(None)
+        container = custom_setup['col']
+        # Create a document to check the response headers
+        container.upsert_item(body={'id': str(uuid.uuid4()), 'pk': PK_VALUE, 'name': 'sample document', 'key': 'value'},
+                                              raw_response_hook=user_agent_hook)
+
     # test cosmos client timeout
 
 if __name__ == '__main__':
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
index ae5bc8198043..2d43fb492b8c 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_sm_mrr_async.py
@@ -4,7 +4,7 @@
 import os
 import unittest
 import uuid
-from typing import Dict, Any
+from typing import Any, Union
 
 import pytest
 from azure.core.pipeline.transport._aiohttp import AioHttpTransport
@@ -17,7 +17,7 @@
 from _fault_injection_transport_async import FaultInjectionTransportAsync
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation, cleanup_method, perform_read_operation
 from test_per_partition_circuit_breaker_mm import create_doc, write_operations_and_errors, operations, REGION_1, \
-    REGION_2, PK_VALUE, READ, validate_stats, CREATE
+    REGION_2, PK_VALUE, READ, validate_stats, CREATE, user_agent_hook
 from test_per_partition_circuit_breaker_sm_mrr import validate_unhealthy_partitions
 
 COLLECTION = "created_collection"
@@ -31,7 +31,7 @@ class TestPerPartitionCircuitBreakerSmMrrAsync:
     TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID
     TEST_CONTAINER_MULTI_PARTITION_ID = test_config.TestConfig.TEST_MULTI_PARTITION_CONTAINER_ID
 
-    async def setup_method_with_custom_transport(self, custom_transport: AioHttpTransport, default_endpoint=host, **kwargs):
+    async def setup_method_with_custom_transport(self, custom_transport: Union[AioHttpTransport, Any], default_endpoint=host, **kwargs):
         container_id = kwargs.pop("container_id", None)
         if not container_id:
             container_id = self.TEST_CONTAINER_MULTI_PARTITION_ID
@@ -43,7 +43,7 @@ async def setup_method_with_custom_transport(self, custom_transport: AioHttpTran
         return {"client": client, "db": db, "col": container}
 
     @staticmethod
-    async def cleanup_method(initialized_objects: Dict[str, Any]):
+    async def cleanup_method(initialized_objects: dict[str, Any]):
         method_client: CosmosClient = initialized_objects["client"]
         await method_client.close()
 
@@ -234,6 +234,14 @@ async def test_service_request_error_async(self, read_operation, write_operation
         assert len(global_endpoint_manager.location_cache.location_unavailability_info_by_endpoint) == 1
         await cleanup_method([custom_setup, setup])
 
+    async def test_circuit_breaker_user_agent_feature_flag_sm_async(self):
+        # Simple test to verify the user agent suffix is being updated with the relevant feature flags
+        custom_setup = await self.setup_method_with_custom_transport(None)
+        container = custom_setup['col']
+        # Create a document to check the response headers
+        await container.upsert_item(body={'id': str(uuid.uuid4()), 'pk': PK_VALUE, 'name': 'sample document', 'key': 'value'},
+                                              raw_response_hook=user_agent_hook)
+
     # test cosmos client timeout
 
 if __name__ == '__main__':

From 65f9e0126e9b64bd306dce1f4e925f9417256034 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 20 Oct 2025 18:01:05 -0400
Subject: [PATCH 56/68] Update test_per_partition_automatic_failover_async.py

---
 .../tests/test_per_partition_automatic_failover_async.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 214662e20603..25dbcf5b64af 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -20,7 +20,7 @@
 from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors, write_operations_and_boolean
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
-# cspell:disable
+#cspell:ignore PPAF, ppaf
 
 # These tests assume that the configured live account has one main write region and one secondary read region.
 

From e15e43d4b0fda695b866a2c07891d3931412b070 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 24 Oct 2025 16:22:55 -0400
Subject: [PATCH 57/68] move user agent logic

---
 .../azure/cosmos/aio/_asynchronous_request.py          | 10 ++++++++++
 .../azure/cosmos/aio/_retry_utility_async.py           |  8 --------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 945c4615b49e..24c05cab8959 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -34,6 +34,7 @@
 from .. import http_constants
 from . import _retry_utility_async
 from .._synchronized_request import _request_body_from_data, _replace_url_prefix
+from .._utils import get_user_agent_features
 
 
 async def _Request(global_endpoint_manager, request_params, connection_policy, pipeline_client, request, **kwargs): # pylint: disable=too-many-statements
@@ -90,6 +91,15 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
 
     parse_result = urlparse(request.url)
 
+    # Add relevant enabled features to user agent for debugging
+    if request.headers['x-ms-thinclient-proxy-resource-type'] == 'docs':
+        user_agent_features = get_user_agent_features(global_endpoint_manager)
+        if len(user_agent_features) > 0:
+            user_agent = kwargs.pop("user_agent", global_endpoint_manager.client._user_agent)
+            user_agent = "{} {}".format(user_agent, user_agent_features)
+            kwargs.update({"user_agent": user_agent})
+            kwargs.update({"user_agent_overwrite": True})
+
     # The requests library now expects header values to be strings only starting 2.11,
     # and will raise an error on validation if they are not, so casting all header values to strings.
     request.headers.update({header: str(value) for header, value in request.headers.items()})
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
index f54cd61bfd0b..80dffa6aee72 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py
@@ -44,7 +44,6 @@
 from ..exceptions import CosmosHttpResponseError
 from ..http_constants import HttpHeaders, StatusCodes, SubStatusCodes
 from .._cosmos_http_logging_policy import _log_diagnostics_error
-from .._utils import get_user_agent_features
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
@@ -114,13 +113,6 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
         container_recreate_retry_policy = ContainerRecreateRetryPolicy(
             client, client._container_properties_cache, None, *args)
 
-    user_agent_features = get_user_agent_features(global_endpoint_manager)
-    if len(user_agent_features) > 0:
-        user_agent = kwargs.pop("user_agent", client._user_agent)
-        user_agent = "{} {}".format(user_agent, user_agent_features)
-        kwargs.update({"user_agent": user_agent})
-        kwargs.update({"user_agent_overwrite": True})
-
     while True:
         client_timeout = kwargs.get('timeout')
         start_time = time.time()

From 0d7e887ef2315aacdad1af1727ba9bc780e4d804 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:28:57 -0400
Subject: [PATCH 58/68] sync and async match, remove print statements

---
 .../azure/cosmos/_synchronized_request.py     | 10 +++++++
 .../azure-cosmos/azure/cosmos/_utils.py       |  3 ++-
 .../azure/cosmos/aio/_asynchronous_request.py | 27 -------------------
 ..._per_partition_automatic_failover_async.py |  5 ----
 4 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index e6109b5bd621..8f3e36728f7f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -29,6 +29,7 @@
 from azure.core.exceptions import DecodeError  # type: ignore
 
 from . import exceptions, http_constants, _retry_utility
+from ._utils import get_user_agent_features
 
 
 def _is_readable_stream(obj):
@@ -115,6 +116,15 @@ def _Request(global_endpoint_manager, request_params, connection_policy, pipelin
 
     parse_result = urlparse(request.url)
 
+    # Add relevant enabled features to user agent for debugging
+    if request.headers['x-ms-thinclient-proxy-resource-type'] == 'docs':
+        user_agent_features = get_user_agent_features(global_endpoint_manager)
+        if len(user_agent_features) > 0:
+            user_agent = kwargs.pop("user_agent", global_endpoint_manager.client._user_agent)
+            user_agent = "{} {}".format(user_agent, user_agent_features)
+            kwargs.update({"user_agent": user_agent})
+            kwargs.update({"user_agent_overwrite": True})
+
     # The requests library now expects header values to be strings only starting 2.11,
     # and will raise an error on validation if they are not, so casting all header values to strings.
     request.headers.update({header: str(value) for header, value in request.headers.items()})
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
index 8bb57ccd6562..aaf7f5b39b83 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
@@ -33,6 +33,7 @@
 from ._version import VERSION
 
 # cspell:ignore ppcb
+# pylint: disable=protected-access
 
 def get_user_agent(suffix: Optional[str] = None) -> str:
     os_name = safe_user_agent_header(platform.platform())
@@ -166,4 +167,4 @@ def get_user_agent_features(global_endpoint_manager: Any) -> str:
         _Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower()
     if ppcb_check == "true" or feature_flag > 0:
         feature_flag += _Constants.UserAgentFeatureFlags.PER_PARTITION_CIRCUIT_BREAKER
-    return f"| F{feature_flag}" if feature_flag > 0 else ""
\ No newline at end of file
+    return f"| F{feature_flag}" if feature_flag > 0 else ""
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 24c05cab8959..873d6d58685f 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -24,8 +24,6 @@
 import copy
 import json
 import time
-from datetime import datetime, timezone
-import logging
 
 from urllib.parse import urlparse
 from azure.core.exceptions import DecodeError  # type: ignore
@@ -112,12 +110,6 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
         and not connection_policy.DisableSSLVerification
     )
 
-    route_end = time.perf_counter()    
-
-    route_duration = (route_end - route_start) * 1000
-
-    start = time.perf_counter()
-
     if connection_policy.SSLConfiguration or "connection_cert" in kwargs:
         ca_certs = connection_policy.SSLConfiguration.SSLCaCerts
         cert_files = (connection_policy.SSLConfiguration.SSLCertFile, connection_policy.SSLConfiguration.SSLKeyFile)
@@ -145,25 +137,6 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
             **kwargs
         )
 
-    end = time.perf_counter()
-    duration = (end - start) * 1000
-
-    logger = logging.getLogger("internal_requests")
-    response_time = datetime.now(timezone.utc)
-    print_string = f"Response time: {response_time.isoformat()} | "
-    print_string += f"Request URL: {request.url} | "
-    print_string += f"Resource type: {request.headers['x-ms-thinclient-proxy-resource-type']} | "
-    print_string += f"Operation type: {request.headers['x-ms-thinclient-proxy-operation-type']} | "
-    print_string += f"Status code: {response.http_response.status_code} | "
-    print_string += f"Sub-status code: {response.http_response.headers.get('x-ms-substatus', 'N/A')} | "
-    print_string += f"Routing duration: {route_duration} ms | "
-    print_string += f"Request/response duration: {duration} ms | "
-    print_string += f"Activity Id: {request.headers.get('x-ms-activity-id', 'N/A')} |"
-    print_string += f"Partition Id: {response.http_response.headers.get('x-ms-cosmos-internal-partition-id', 'N/A')} |"
-    print_string += f"Physical Id: {response.http_response.headers.get('x-ms-cosmos-physical-partition-id', 'N/A')} |"
-    logger.info(print_string)
-    print(print_string)
-
     response = response.http_response
     headers = copy.copy(response.headers)
 
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 0e91a5e42069..29205b4051b7 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -51,7 +51,6 @@ def try_move_to_next_location(
         :rtype: bool
         """
         with self._lock:
-            print("got lock to move to next location")
             if endpoint_region != self.current_region and self.current_region is not None:
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
@@ -68,8 +67,6 @@ def try_move_to_next_location(
                 logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
                 request.route_to_location(regional_endpoint)
-                print(f"routing to {regional_endpoint} from {endpoint_region}")
-                print(f"current unavailable: {str(self.unavailable_regional_endpoints)}")
                 return True
 
             return False
@@ -199,8 +196,6 @@ def resolve_service_endpoint_for_partition(
                                     request)
                     else:
                         # Update the current regional endpoint to whatever the request is routing to
-                        endpoint_region = self.location_cache.get_location_from_endpoint(
-                            request.location_endpoint_to_route)
                         partition_failover_info.current_region = endpoint_region
             else:
                 partition_failover_info = PartitionLevelFailoverInfo()

From aa3b641279f049eca9b1402188ef7e5c060f204a Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:39:18 -0400
Subject: [PATCH 59/68] leftover timer

---
 .../azure-cosmos/azure/cosmos/aio/_asynchronous_request.py      | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 873d6d58685f..56ba13c24e1d 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -73,8 +73,6 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
         if kwargs['timeout'] <= 0:
             raise exceptions.CosmosClientTimeoutError()
 
-    route_start = time.perf_counter()
-
     if request_params.endpoint_override:
         base_url = request_params.endpoint_override
     else:

From 799f6de06f1472eff69a8a7974f633648b3ba67b Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 30 Oct 2025 13:44:27 -0400
Subject: [PATCH 60/68] Update _retry_utility.py

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 106709f0e7bf..af3fcd1edb11 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -43,7 +43,6 @@
 from .exceptions import CosmosHttpResponseError
 from .http_constants import HttpHeaders, StatusCodes, SubStatusCodes, ResourceType
 from ._cosmos_http_logging_policy import _log_diagnostics_error
-from ._utils import get_user_agent_features
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
@@ -115,13 +114,6 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
         container_recreate_retry_policy = _container_recreate_retry_policy.ContainerRecreateRetryPolicy(
             client, client._container_properties_cache, None, *args)
 
-    user_agent_features = get_user_agent_features(global_endpoint_manager)
-    if len(user_agent_features) > 0:
-        user_agent = kwargs.pop("user_agent", client._user_agent)
-        user_agent = "{} {}".format(user_agent, user_agent_features)
-        kwargs.update({"user_agent": user_agent})
-        kwargs.update({"user_agent_overwrite": True})
-
     while True:
         client_timeout = kwargs.get('timeout')
         start_time = time.time()

From 36249b451ea761ae4dbc77b079b355fae60fc4fe Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 30 Oct 2025 13:46:42 -0400
Subject: [PATCH 61/68] use constants

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py   | 2 +-
 .../azure-cosmos/azure/cosmos/aio/_asynchronous_request.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index 8f3e36728f7f..4e37131e6161 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -117,7 +117,7 @@ def _Request(global_endpoint_manager, request_params, connection_policy, pipelin
     parse_result = urlparse(request.url)
 
     # Add relevant enabled features to user agent for debugging
-    if request.headers['x-ms-thinclient-proxy-resource-type'] == 'docs':
+    if request.headers[http_constants.HttpHeaders.ThinClientProxyResourceType] == http_constants.ResourceType.Document:
         user_agent_features = get_user_agent_features(global_endpoint_manager)
         if len(user_agent_features) > 0:
             user_agent = kwargs.pop("user_agent", global_endpoint_manager.client._user_agent)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index 56ba13c24e1d..7091c598d302 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -88,7 +88,7 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
     parse_result = urlparse(request.url)
 
     # Add relevant enabled features to user agent for debugging
-    if request.headers['x-ms-thinclient-proxy-resource-type'] == 'docs':
+    if request.headers[http_constants.HttpHeaders.ThinClientProxyResourceType] == http_constants.ResourceType.Document:
         user_agent_features = get_user_agent_features(global_endpoint_manager)
         if len(user_agent_features) > 0:
             user_agent = kwargs.pop("user_agent", global_endpoint_manager.client._user_agent)

From 0495c7b696684a487ed950e1f5f6151f9d8422de Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Fri, 31 Oct 2025 09:13:00 -0400
Subject: [PATCH 62/68] pylint

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py   | 2 +-
 .../azure-cosmos/azure/cosmos/aio/_asynchronous_request.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
index aad9256a424f..55f9ac40a00c 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_synchronized_request.py
@@ -79,7 +79,7 @@ def _Request(global_endpoint_manager, request_params, connection_policy, pipelin
     :rtype: tuple of (dict, dict)
 
     """
-    # pylint: disable=protected-access
+    # pylint: disable=protected-access, too-many-branches
 
     connection_timeout = connection_policy.RequestTimeout
     connection_timeout = kwargs.pop("connection_timeout", connection_timeout)
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
index f97dd99e8722..2d8c7e313a62 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_asynchronous_request.py
@@ -50,7 +50,7 @@ async def _Request(global_endpoint_manager, request_params, connection_policy, p
     :rtype: tuple of (dict, dict)
 
     """
-    # pylint: disable=protected-access
+    # pylint: disable=protected-access, too-many-branches
 
     connection_timeout = connection_policy.RequestTimeout
     read_timeout = connection_policy.ReadTimeout

From 8639093f8b4b374deba8ef36ed1b0d0f0c358c26 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Mon, 17 Nov 2025 10:18:18 -0500
Subject: [PATCH 63/68] Update CHANGELOG.md

---
 sdk/cosmos/azure-cosmos/CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md
index 612a2ec3ab16..d9b36f4dab5c 100644
--- a/sdk/cosmos/azure-cosmos/CHANGELOG.md
+++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md
@@ -3,12 +3,14 @@
 ### 4.14.3 (Unreleased)
 
 #### Features Added
+* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover). See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 #### Breaking Changes
 
 #### Bugs Fixed
 
 #### Other Changes
+* Added cross-regional retries for 503 (Service Unavailable) errors. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 
 ### 4.14.2 (2025-11-14)
 
@@ -56,7 +58,6 @@ This version and all future versions will require Python 3.9+.
 
 #### Features Added
 * Added read_items API to provide an efficient method for retrieving multiple items in a single request. See [PR 42167](https://github.com/Azure/azure-sdk-for-python/pull/42167).
-* Added support for Per Partition Automatic Failover. To enable this feature, you must follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover). See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 * Added ability to replace a container's indexing policy if a vector embedding policy was present. See [PR 42810](https://github.com/Azure/azure-sdk-for-python/pull/42810).
 
 #### Bugs Fixed
@@ -64,7 +65,6 @@ This version and all future versions will require Python 3.9+.
 * Fixed bug where during health checks read regions were marked as unavailable for write operations. See [PR 42525](https://github.com/Azure/azure-sdk-for-python/pull/42525).
 * Fixed bug where containers named with spaces or special characters using session consistency would fall back to eventual consistency. See [PR 42608](https://github.com/Azure/azure-sdk-for-python/pull/42608)
 * Fixed bug where `excluded_locations` was not being honored for some metadata calls. See [PR 42266](https://github.com/Azure/azure-sdk-for-python/pull/42266).
-* Added cross-regional retries for 503 (Service Unavailable) errors. See [PR 41588](https://github.com/Azure/azure-sdk-for-python/pull/41588).
 * Fixed bug where Hybrid Search queries using parameters were not working. See [PR 42787](https://github.com/Azure/azure-sdk-for-python/pull/42787)
 * Fixed partition scoping for per partition circuit breaker. See [PR 42751](https://github.com/Azure/azure-sdk-for-python/pull/42751)
 * Fixed bug where `partition_key` set to None was not properly handled for some operations. See [PR 42747](https://github.com/Azure/azure-sdk-for-python/pull/42747)

From 5b3815f9182d23acdecf66886e7df0c27779340e Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 19 Nov 2025 12:04:48 -0500
Subject: [PATCH 64/68] react to comments

---
 .../azure-cosmos/azure/cosmos/_constants.py   |  3 ++
 .../_endpoint_discovery_retry_policy.py       |  2 +-
 ...anager_per_partition_automatic_failover.py | 48 +++++-------------
 .../azure/cosmos/_retry_utility.py            |  4 +-
 .../_service_unavailable_retry_policy.py      |  2 +-
 .../azure-cosmos/azure/cosmos/_utils.py       | 18 +++----
 ..._per_partition_automatic_failover_async.py | 48 +++++-------------
 .../test_per_partition_automatic_failover.py  | 48 ++++++++----------
 ..._per_partition_automatic_failover_async.py | 49 ++++++++-----------
 .../test_per_partition_circuit_breaker_mm.py  |  8 +--
 10 files changed, 88 insertions(+), 142 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
index a519f1110ee2..0a5e961f7aa9 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py
@@ -116,6 +116,9 @@ class UserAgentFeatureFlags(IntEnum):
         suffix. This number will then be used to determine what features are enabled by decoding the hex string back
         to a number and checking what bits are set.
 
+        Features being developed should align with the .NET SDK as a source of truth for feature flag assignments:
+        https://github.com/Azure/azure-cosmos-dotnet-v3/blob/master/Microsoft.Azure.Cosmos/src/Diagnostics/UserAgentFeatureFlags.cs
+
         Example:
             If the user agent suffix has "F3", this means that flags 1 and 2.
         """
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
index 83bd966dc3f9..3357c097c63a 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_endpoint_discovery_retry_policy.py
@@ -75,7 +75,7 @@ def ShouldRetry(self, exception):  # pylint: disable=unused-argument
             location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
                 str(self.request.location_endpoint_to_route))
             regional_endpoint = (self.global_endpoint_manager.location_cache.
-                                account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+                                account_read_regional_routing_contexts_by_location.get(location))
             partition_level_info.unavailable_regional_endpoints[location] = regional_endpoint
             self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
             return True
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index 0e912fbace67..a4fb22b821ca 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -21,6 +21,7 @@
 
 if TYPE_CHECKING:
     from azure.cosmos._cosmos_client_connection import CosmosClientConnection
+    from azure.cosmos._location_cache import RegionalRoutingContext
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
@@ -33,18 +34,18 @@ class PartitionLevelFailoverInfo:
     Used to track the partition key range and the regions where it is available.
     """
     def __init__(self) -> None:
-        self.unavailable_regional_endpoints: dict[str, str] = {}
+        self.unavailable_regional_endpoints: dict[str, "RegionalRoutingContext"] = {}
         self._lock = threading.Lock()
         self.current_region: Optional[str] = None
 
     def try_move_to_next_location(
             self,
-            available_account_regional_endpoints: dict[str, str],
+            available_account_regional_endpoints: dict[str, "RegionalRoutingContext"],
             endpoint_region: str,
             request: RequestObject) -> bool:
         """
         Tries to move to the next available regional endpoint for the partition key range.
-        :param Dict[str, str] available_account_regional_endpoints: The available regional endpoints
+        :param Dict[str, RegionalRoutingContext] available_account_regional_endpoints: The available regional endpoints
         :param str endpoint_region: The current regional endpoint
         :param RequestObject request: The request object containing the routing context.
         :return: True if the move was successful, False otherwise.
@@ -53,7 +54,7 @@ def try_move_to_next_location(
         with self._lock:
             if endpoint_region != self.current_region and self.current_region is not None:
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint)
+                request.route_to_location(regional_endpoint.primary_endpoint)
                 return True
 
             for regional_endpoint in available_account_regional_endpoints:
@@ -66,7 +67,7 @@ def try_move_to_next_location(
                 self.current_region = regional_endpoint
                 logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint)
+                request.route_to_location(regional_endpoint.primary_endpoint)
                 return True
 
             return False
@@ -99,7 +100,7 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
-        available_regions = self.compute_available_preferred_regions(request)
+        available_regions = self.location_cache.account_read_regional_routing_contexts_by_location
         if len(available_regions) <= 1:
             return False
 
@@ -134,7 +135,6 @@ def try_ppaf_failover_threshold(
                 # Once we mark the endpoint unavailable, the PPAF endpoint manager will try to move to the next
                 # available region for the partition key range
                 with self._threshold_lock:
-                    logger.warning("PPAF - Failover threshold reached for partition key range: %s", pk_range_wrapper)
                     # Check for count again, since a previous request may have now reset the count
                     if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
                             >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
@@ -143,6 +143,8 @@ def try_ppaf_failover_threshold(
                         partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
                         location = self.location_cache.get_location_from_endpoint(
                             str(request.location_endpoint_to_route))
+                        logger.warning("PPAF - Failover threshold reached for partition key range: %s for region: %s", #pylint: disable=line-too-long
+                                       pk_range_wrapper, location)
                         regional_context = (self.location_cache.
                                             account_read_regional_routing_contexts_by_location.
                                             get(location).primary_endpoint)
@@ -171,15 +173,15 @@ def resolve_service_endpoint_for_partition(
                 if request.location_endpoint_to_route is not None:
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
                     if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
-                        available_account_regional_endpoints = self.compute_available_preferred_regions(request)
+                        available_account_regional_endpoints = self.location_cache.account_read_regional_routing_contexts_by_location #pylint: disable=line-too-long
                         if (partition_failover_info.current_region is not None and
                                 endpoint_region != partition_failover_info.current_region):
                             # this request has not yet seen there's an available region being used for this partition
                             regional_endpoint = available_account_regional_endpoints[
-                                partition_failover_info.current_region]
+                                partition_failover_info.current_region].primary_endpoint
                             request.route_to_location(regional_endpoint)
                         else:
-                            if (len(self.compute_available_preferred_regions(request))
+                            if (len(self.location_cache.account_read_regional_routing_contexts_by_location)
                                     == len(partition_failover_info.unavailable_regional_endpoints)):
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
@@ -190,7 +192,7 @@ def resolve_service_endpoint_for_partition(
                             else:
                                 # If the current region is unavailable, we try to move to the next available region
                                 partition_failover_info.try_move_to_next_location(
-                                    self.compute_available_preferred_regions(request),
+                                    self.location_cache.account_read_regional_routing_contexts_by_location,
                                     endpoint_region,
                                     request)
                     else:
@@ -204,30 +206,6 @@ def resolve_service_endpoint_for_partition(
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
-    def compute_available_preferred_regions(
-            self,
-            request: RequestObject
-    ) -> dict[str, str]:
-        """
-        Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
-        :param RequestObject request: The request object containing the routing context.
-        :return: A set of available regional endpoints.
-        :rtype: Dict[str, str]
-        """
-        if request.excluded_locations:
-            excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
-        else:
-            excluded_locations = self.location_cache.connection_policy.ExcludedLocations
-        if excluded_locations is None:
-            excluded_locations = []
-        preferred_locations = self.location_cache.effective_preferred_locations
-        available_regions = [item for item in preferred_locations if item not in excluded_locations]
-        available_regional_endpoints = {}
-        for region, context in self.location_cache.account_read_regional_routing_contexts_by_location.items():
-            if region in available_regions:
-                available_regional_endpoints[region] = context.primary_endpoint
-        return available_regional_endpoints
-
     def record_failure(self,
                        request: RequestObject,
                        pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 47fef50b8779..32efca023d11 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -209,8 +209,8 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
                     request.headers[retry_policy._intended_headers] = retry_policy.container_rid
             elif e.status_code == StatusCodes.SERVICE_UNAVAILABLE:
                 if args:
-                    # record the failure for ppaf/circuit breaker tracking
-                    global_endpoint_manager.record_failure(args[0], pk_range_wrapper)
+                    # record the failure for circuit breaker tracking
+                    global_endpoint_manager.record_ppcb_failure(args[0], pk_range_wrapper)
                 retry_policy = service_unavailable_retry_policy
             elif e.status_code == StatusCodes.REQUEST_TIMEOUT or e.status_code >= StatusCodes.INTERNAL_SERVER_ERROR:
                 if args:
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
index 8269eb86799d..a210f9348f89 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_service_unavailable_retry_policy.py
@@ -57,7 +57,7 @@ def ShouldRetry(self, _exception: CosmosHttpResponseError):
                 location = self.global_endpoint_manager.location_cache.get_location_from_endpoint(
                     str(self.request.location_endpoint_to_route))
                 regional_context = (self.global_endpoint_manager.location_cache.
-                                    account_read_regional_routing_contexts_by_location.get(location).primary_endpoint)
+                                    account_read_regional_routing_contexts_by_location.get(location))
                 partition_level_info.unavailable_regional_endpoints[location] = regional_context
                 self.global_endpoint_manager.resolve_service_endpoint_for_partition(self.request, self.pk_range_wrapper)
                 return True
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
index aaf7f5b39b83..1e5f5602b9e5 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
@@ -152,19 +152,19 @@ def valid_key_value_exist(
 
 
 def get_user_agent_features(global_endpoint_manager: Any) -> str:
-    """Check the account and client configurations in order to add feature flags to the user agent.
-
-    :param Any global_endpoint_manager: The global endpoint manager instance used to check against.
-    :return: The string representing the user agent features to include.
-    :rtype: str
+    """
+    Check the account and client configurations in order to add feature flags
+    to the user agent using bitmask logic and hex encoding (matching .NET/Java).
     """
     feature_flag = 0
+    # Bitwise OR for feature flags
     if global_endpoint_manager._database_account_cache is not None:
         if global_endpoint_manager._database_account_cache._EnablePerPartitionFailoverBehavior is True:
-            feature_flag += _Constants.UserAgentFeatureFlags.PER_PARTITION_AUTOMATIC_FAILOVER
+            feature_flag |= _Constants.UserAgentFeatureFlags.PER_PARTITION_AUTOMATIC_FAILOVER
     ppcb_check = os.environ.get(
         _Constants.CIRCUIT_BREAKER_ENABLED_CONFIG,
-        _Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT).lower()
+        _Constants.CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT
+    ).lower()
     if ppcb_check == "true" or feature_flag > 0:
-        feature_flag += _Constants.UserAgentFeatureFlags.PER_PARTITION_CIRCUIT_BREAKER
-    return f"| F{feature_flag}" if feature_flag > 0 else ""
+        feature_flag |= _Constants.UserAgentFeatureFlags.PER_PARTITION_CIRCUIT_BREAKER
+    return f"| F{feature_flag:X}" if feature_flag > 0 else ""
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 29205b4051b7..8f94dbebd008 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -21,6 +21,7 @@
 
 if TYPE_CHECKING:
     from azure.cosmos.aio._cosmos_client_connection_async import CosmosClientConnection
+    from azure.cosmos._location_cache import RegionalRoutingContext
 
 logger = logging.getLogger("azure.cosmos._GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover")
 
@@ -33,18 +34,18 @@ class PartitionLevelFailoverInfo:
     Used to track the partition key range and the regions where it is available.
     """
     def __init__(self) -> None:
-        self.unavailable_regional_endpoints: dict[str, str] = {}
+        self.unavailable_regional_endpoints: dict[str, "RegionalRoutingContext"] = {}
         self._lock = threading.Lock()
         self.current_region: Optional[str] = None
 
     def try_move_to_next_location(
             self,
-            available_account_regional_endpoints: dict[str, str],
+            available_account_regional_endpoints: dict[str, "RegionalRoutingContext"],
             endpoint_region: str,
             request: RequestObject) -> bool:
         """
         Tries to move to the next available regional endpoint for the partition key range.
-        :param Dict[str, str] available_account_regional_endpoints: The available regional endpoints
+        :param Dict[str, RegionalRoutingContext] available_account_regional_endpoints: The available regional endpoints
         :param str endpoint_region: The current regional endpoint
         :param RequestObject request: The request object containing the routing context.
         :return: True if the move was successful, False otherwise.
@@ -53,7 +54,7 @@ def try_move_to_next_location(
         with self._lock:
             if endpoint_region != self.current_region and self.current_region is not None:
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint)
+                request.route_to_location(regional_endpoint.primary_endpoint)
                 return True
 
             for regional_endpoint in available_account_regional_endpoints:
@@ -66,7 +67,7 @@ def try_move_to_next_location(
                 self.current_region = regional_endpoint
                 logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
                 regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint)
+                request.route_to_location(regional_endpoint.primary_endpoint)
                 return True
 
             return False
@@ -100,7 +101,7 @@ def is_per_partition_automatic_failover_applicable(self, request: RequestObject)
             return False
 
         # if we have at most one region available in the account, we cannot do per partition automatic failover
-        available_regions = self.compute_available_preferred_regions(request)
+        available_regions = self.location_cache.account_read_regional_routing_contexts_by_location
         if len(available_regions) <= 1:
             return False
 
@@ -135,7 +136,6 @@ def try_ppaf_failover_threshold(
                 # Once we mark the endpoint unavailable, the PPAF endpoint manager will try to move to the next
                 # available region for the partition key range
                 with self._threshold_lock:
-                    logger.warning("PPAF - Failover threshold reached for partition key range: %s", pk_range_wrapper)
                     # Check for count again, since a previous request may have now reset the count
                     if (self.ppaf_thresholds_tracker.get_pk_failures(pk_range_wrapper)
                             >= int(os.environ.get(Constants.TIMEOUT_ERROR_THRESHOLD_PPAF,
@@ -144,6 +144,8 @@ def try_ppaf_failover_threshold(
                         partition_level_info = self.partition_range_to_failover_info[pk_range_wrapper]
                         location = self.location_cache.get_location_from_endpoint(
                             str(request.location_endpoint_to_route))
+                        logger.warning("PPAF - Failover threshold reached for partition key range: %s for region: %s", #pylint: disable=line-too-long
+                                       pk_range_wrapper, location)
                         regional_context = (self.location_cache.
                                             account_read_regional_routing_contexts_by_location.
                                             get(location).primary_endpoint)
@@ -172,15 +174,15 @@ def resolve_service_endpoint_for_partition(
                 if request.location_endpoint_to_route is not None:
                     endpoint_region = self.location_cache.get_location_from_endpoint(request.location_endpoint_to_route)
                     if endpoint_region in partition_failover_info.unavailable_regional_endpoints:
-                        available_account_regional_endpoints = self.compute_available_preferred_regions(request)
+                        available_account_regional_endpoints = self.location_cache.account_read_regional_routing_contexts_by_location #pylint: disable=line-too-long
                         if (partition_failover_info.current_region is not None and
                                 endpoint_region != partition_failover_info.current_region):
                             # this request has not yet seen there's an available region being used for this partition
                             regional_endpoint = available_account_regional_endpoints[
-                                partition_failover_info.current_region]
+                                partition_failover_info.current_region].primary_endpoint
                             request.route_to_location(regional_endpoint)
                         else:
-                            if (len(self.compute_available_preferred_regions(request)) ==
+                            if (len(self.location_cache.account_read_regional_routing_contexts_by_location) ==
                                     len(partition_failover_info.unavailable_regional_endpoints)):
                                 # If no other region is available, we invalidate the cache and start once again
                                 # from our main write region in the account configurations
@@ -191,7 +193,7 @@ def resolve_service_endpoint_for_partition(
                             else:
                                 # If the current region is unavailable, we try to move to the next available region
                                 partition_failover_info.try_move_to_next_location(
-                                    self.compute_available_preferred_regions(request),
+                                    self.location_cache.account_read_regional_routing_contexts_by_location,
                                     endpoint_region,
                                     request)
                     else:
@@ -205,30 +207,6 @@ def resolve_service_endpoint_for_partition(
                 self.partition_range_to_failover_info[pk_range_wrapper] = partition_failover_info
         return self._resolve_service_endpoint_for_partition_circuit_breaker(request, pk_range_wrapper)
 
-    def compute_available_preferred_regions(
-            self,
-            request: RequestObject
-    ) -> dict[str, str]:
-        """
-        Computes the available regional endpoints for the request based on customer-set preferred and excluded regions.
-        :param RequestObject request: The request object containing the routing context.
-        :return: A set of available regional endpoints.
-        :rtype: Dict[str, str]
-        """
-        if request.excluded_locations:
-            excluded_locations = request.excluded_locations + self.location_cache.connection_policy.ExcludedLocations
-        else:
-            excluded_locations = self.location_cache.connection_policy.ExcludedLocations
-        if excluded_locations is None:
-            excluded_locations = []
-        preferred_locations = self.location_cache.effective_preferred_locations
-        available_regions = [item for item in preferred_locations if item not in excluded_locations]
-        available_regional_endpoints = {}
-        for region, context in self.location_cache.account_read_regional_routing_contexts_by_location.items():
-            if region in available_regions:
-                available_regional_endpoints[region] = context.primary_endpoint
-        return available_regional_endpoints
-
     async def record_failure(self,
                              request: RequestObject,
                              pk_range_wrapper: Optional[PartitionKeyRangeWrapper] = None) -> None:
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index 8f34f14a3913..b9a161829fc3 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -8,10 +8,9 @@
 from azure.core.exceptions import ServiceResponseError
 from azure.cosmos import CosmosClient
 from azure.cosmos.exceptions import CosmosHttpResponseError
-from azure.cosmos._request_object import RequestObject
 from _fault_injection_transport import FaultInjectionTransport
-from test_per_partition_circuit_breaker_mm import (REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_boolean,
-                                                   write_operations_and_errors, perform_write_operation)
+from test_per_partition_circuit_breaker_mm import (REGION_1, REGION_2, PK_VALUE, BATCH,
+                                                   write_operations_errors_and_boolean, perform_write_operation)
 
 # cspell:disable
 
@@ -101,12 +100,15 @@ def setup_info(self, error=None, max_count=None, is_batch=False, exclude_client_
                                                         exclude_client_regions=exclude_client_regions, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
-    def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
+    @pytest.mark.parametrize("write_operation, error, exclude_regions", write_operations_errors_and_boolean(create_failover_errors()))
+    def test_ppaf_partition_info_cache_and_routing(self, write_operation, error, exclude_regions):
         # This test validates that the partition info cache is updated correctly upon failures, and that the
         # per-partition automatic failover logic routes requests to the next available regional endpoint on 403.3 errors.
+        # We also verify that this logic is unaffected by user excluded regions, since write-region routing is entirely
+        # taken care of on the service.
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, 1, write_operation == BATCH)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, 1,
+                                                                                                        write_operation == BATCH, exclude_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -145,13 +147,15 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error):
         assert partition_info.current_region is None
 
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
-    def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
+    @pytest.mark.parametrize("write_operation, error, exclude_regions", write_operations_errors_and_boolean(create_threshold_errors()))
+    def test_ppaf_partition_thresholds_and_routing(self, write_operation, error, exclude_regions):
         # This test validates the consecutive failures logic is properly handled for per-partition automatic failover,
         # and that the per-partition automatic failover logic routes requests to the next available regional endpoint
-        # after enough consecutive failures have occurred.
+        # after enough consecutive failures have occurred. We also verify that this logic is unaffected by user excluded
+        # regions, since write-region routing is entirely taken care of on the service.
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error=error_lambda,
+                                                                                                        exclude_client_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -208,29 +212,17 @@ def test_ppaf_partition_thresholds_and_routing(self, write_operation, error):
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
         assert failure_count == 2
 
-    @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
-    def test_ppaf_exclude_regions(self, write_operation, exclude_client_regions):
-        # This test validates that the per-partition automatic failover logic does not apply to configs without enough regions.
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(exclude_client_regions=exclude_client_regions)
-        fault_injection_container = custom_setup['col']
-        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
-        # Check that computing valid regions for PPAF only returns a single region
-        request_object = RequestObject(resource_type="docs", operation_type=write_operation, headers={})
-        if exclude_client_regions is False:
-            request_object.excluded_locations = [REGION_2]
-        available_ppaf_regions = global_endpoint_manager.compute_available_preferred_regions(request_object)
-        assert len(available_ppaf_regions) == 1
-        # Check that all requests are marked as non-PPAF available due to the fact that we only have one region
-        assert global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_object) is False
-
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
-    def test_ppaf_session_unavailable_retry(self, write_operation, error):
+    @pytest.mark.parametrize("write_operation, error, exclude_regions", write_operations_errors_and_boolean(create_failover_errors()))
+    def test_ppaf_session_unavailable_retry(self, write_operation, error, exclude_regions):
         # Account config has 2 regions: West US 3 (A) and West US (B). This test validates that after marking the write
         # region (A) as unavailable, the next request is retried to the read region (B) and succeeds. The next read request
         # should see that the write region (A) is unavailable for the partition, and should retry to the read region (B) as well.
+        # We also verify that this logic is unaffected by user excluded regions, since write-region routing is entirely
+        # taken care of on the service.
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, max_count=1,
-                                                                                                        is_batch=write_operation==BATCH, session_error=True)
+                                                                                                        is_batch=write_operation==BATCH,
+                                                                                                        session_error=True, exclude_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 25dbcf5b64af..7fa4982dd70a 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -13,11 +13,10 @@
 from azure.core.exceptions import ServiceResponseError
 from azure.cosmos.exceptions import CosmosHttpResponseError
 from azure.cosmos.aio import CosmosClient
-from azure.cosmos._request_object import RequestObject
 from _fault_injection_transport import FaultInjectionTransport
 from _fault_injection_transport_async import FaultInjectionTransportAsync
 from test_per_partition_automatic_failover import create_failover_errors, create_threshold_errors, session_retry_hook, ppaf_user_agent_hook
-from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_and_errors, write_operations_and_boolean
+from test_per_partition_circuit_breaker_mm import REGION_1, REGION_2, PK_VALUE, BATCH, write_operations_errors_and_boolean
 from test_per_partition_circuit_breaker_mm_async import perform_write_operation
 
 #cspell:ignore PPAF, ppaf
@@ -96,12 +95,15 @@ async def setup_info(self, error=None, max_count=None, is_batch=False, exclude_c
                                                         exclude_client_regions=exclude_client_regions, **kwargs)
         return setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
-    async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation, error):
+    @pytest.mark.parametrize("write_operation, error, exclude_regions", write_operations_errors_and_boolean(create_failover_errors()))
+    async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation, error, exclude_regions):
         # This test validates that the partition info cache is updated correctly upon failures, and that the
-        # per-partition automatic failover logic routes requests to the next available regional endpoint
+        # per-partition automatic failover logic routes requests to the next available regional endpoint.
+        # We also verify that this logic is unaffected by user excluded regions, since write-region routing is
+        # entirely taken care of on the service.
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, 1, write_operation == BATCH)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, 1,
+                                                                                                              write_operation == BATCH, exclude_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -139,12 +141,15 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
         assert initial_region not in partition_info.unavailable_regional_endpoints
         assert partition_info.current_region is None
 
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_threshold_errors()))
-    async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation, error):
+    @pytest.mark.parametrize("write_operation, error, exclude_regions", write_operations_errors_and_boolean(create_threshold_errors()))
+    async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation, error, exclude_regions):
         # This test validates that the partition info cache is updated correctly upon failures, and that the
-        # per-partition automatic failover logic routes requests to the next available regional endpoint
+        # per-partition automatic failover logic routes requests to the next available regional endpoint.
+        # We also verify that this logic is unaffected by user excluded regions, since write-region routing is
+        # entirely taken care of on the service.
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda)
+        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda,
+                                                                                                              exclude_regions=exclude_regions,)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -202,29 +207,17 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         failure_count = global_endpoint_manager.ppaf_thresholds_tracker.pk_range_wrapper_to_failure_count[pk_range_wrappers[0]]
         assert failure_count == 2
 
-    @pytest.mark.parametrize("write_operation, exclude_client_regions", write_operations_and_boolean())
-    async def test_ppaf_exclude_regions_async(self, write_operation, exclude_client_regions):
-        # This test validates that the per-partition automatic failover logic does not apply to configs without enough regions.
-        setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(exclude_client_regions=exclude_client_regions)
-        fault_injection_container = custom_setup['col']
-        global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
-        # Check that computing valid regions for PPAF only returns a single region
-        request_object = RequestObject(resource_type="docs", operation_type=write_operation, headers={})
-        if exclude_client_regions is False:
-            request_object.excluded_locations = [REGION_2]
-        available_ppaf_regions = global_endpoint_manager.compute_available_preferred_regions(request_object)
-        assert len(available_ppaf_regions) == 1
-        # Check that all requests are marked as non-PPAF available due to the fact that we only have one region
-        assert global_endpoint_manager.is_per_partition_automatic_failover_applicable(request_object) is False
-
-    @pytest.mark.parametrize("write_operation, error", write_operations_and_errors(create_failover_errors()))
-    async def test_ppaf_session_unavailable_retry_async(self, write_operation, error):
+    @pytest.mark.parametrize("write_operation, error, exclude_regions", write_operations_errors_and_boolean(create_failover_errors()))
+    async def test_ppaf_session_unavailable_retry_async(self, write_operation, error, exclude_regions):
         # Account config has 2 regions: West US 3 (A) and West US (B). This test validates that after marking the write
         # region (A) as unavailable, the next request is retried to the read region (B) and succeeds. The next read request
         # should see that the write region (A) is unavailable for the partition, and should retry to the read region (B) as well.
+        # We also verify that this logic is unaffected by user excluded regions, since write-region routing is
+        # entirely taken care of on the service.
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, max_count=1,
-                                                                                                        is_batch=write_operation==BATCH, session_error=True)
+                                                                                                        is_batch=write_operation==BATCH,
+                                                                                                        session_error=True, exclude_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
index 8e4ab4386695..700e2112621b 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_circuit_breaker_mm.py
@@ -60,12 +60,14 @@ def write_operations_and_errors(error_list=None):
 
     return params
 
-def write_operations_and_boolean():
+def write_operations_errors_and_boolean(error_list=None):
     write_operations = [CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH]
+    errors = error_list or create_errors()
     params = []
     for write_operation in write_operations:
-        for boolean in [True, False]:
-            params.append((write_operation, boolean))
+        for error in errors:
+            for boolean in [True, False]:
+                params.append((write_operation, error, boolean))
 
     return params
 

From e31d674eebf9937c40a9de92ea7b75ba0856f94e Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 19 Nov 2025 13:35:05 -0500
Subject: [PATCH 65/68] Update _retry_utility.py

---
 sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
index 32efca023d11..b52a957c4be0 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py
@@ -46,7 +46,7 @@
 
 
 # pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
-# cspell:ignore PPAF,ppaf
+# cspell:ignore PPAF,ppaf,ppcb
 
 # args [0] is the request object
 # args [1] is the connection policy

From e55871cda07f3c694d4f3e7aa1e3d5e942ff780a Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Wed, 19 Nov 2025 15:20:10 -0500
Subject: [PATCH 66/68] mypy pylint

---
 ...ndpoint_manager_per_partition_automatic_failover.py | 10 +++++-----
 sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py         |  4 ++++
 ...t_manager_per_partition_automatic_failover_async.py |  8 ++++----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
index a4fb22b821ca..0547cb41df32 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_partition_endpoint_manager_per_partition_automatic_failover.py
@@ -45,7 +45,7 @@ def try_move_to_next_location(
             request: RequestObject) -> bool:
         """
         Tries to move to the next available regional endpoint for the partition key range.
-        :param Dict[str, RegionalRoutingContext] available_account_regional_endpoints: The available regional endpoints
+        :param dict[str, RegionalRoutingContext] available_account_regional_endpoints: The available regional endpoints
         :param str endpoint_region: The current regional endpoint
         :param RequestObject request: The request object containing the routing context.
         :return: True if the move was successful, False otherwise.
@@ -53,8 +53,8 @@ def try_move_to_next_location(
         """
         with self._lock:
             if endpoint_region != self.current_region and self.current_region is not None:
-                regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint.primary_endpoint)
+                regional_endpoint = available_account_regional_endpoints[self.current_region].primary_endpoint
+                request.route_to_location(regional_endpoint)
                 return True
 
             for regional_endpoint in available_account_regional_endpoints:
@@ -66,8 +66,8 @@ def try_move_to_next_location(
 
                 self.current_region = regional_endpoint
                 logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
-                regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint.primary_endpoint)
+                regional_endpoint = available_account_regional_endpoints[self.current_region].primary_endpoint
+                request.route_to_location(regional_endpoint)
                 return True
 
             return False
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
index 1e5f5602b9e5..0587556f198e 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py
@@ -155,6 +155,10 @@ def get_user_agent_features(global_endpoint_manager: Any) -> str:
     """
     Check the account and client configurations in order to add feature flags
     to the user agent using bitmask logic and hex encoding (matching .NET/Java).
+    
+    :param Any global_endpoint_manager: The GlobalEndpointManager instance.
+    :return: A string representing the user agent feature flags.
+    :rtype: str
     """
     feature_flag = 0
     # Bitwise OR for feature flags
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
index 8f94dbebd008..c96b46ca46b3 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_partition_endpoint_manager_per_partition_automatic_failover_async.py
@@ -53,8 +53,8 @@ def try_move_to_next_location(
         """
         with self._lock:
             if endpoint_region != self.current_region and self.current_region is not None:
-                regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint.primary_endpoint)
+                regional_endpoint = available_account_regional_endpoints[self.current_region].primary_endpoint
+                request.route_to_location(regional_endpoint)
                 return True
 
             for regional_endpoint in available_account_regional_endpoints:
@@ -66,8 +66,8 @@ def try_move_to_next_location(
 
                 self.current_region = regional_endpoint
                 logger.warning("PPAF - Moving to next available regional endpoint: %s", self.current_region)
-                regional_endpoint = available_account_regional_endpoints[self.current_region]
-                request.route_to_location(regional_endpoint.primary_endpoint)
+                regional_endpoint = available_account_regional_endpoints[self.current_region].primary_endpoint
+                request.route_to_location(regional_endpoint)
                 return True
 
             return False

From 0463a3f79202903ed7f5ece033a832b7b51c2338 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 20 Nov 2025 10:13:00 -0500
Subject: [PATCH 67/68] test fixes

---
 .../tests/test_per_partition_automatic_failover.py          | 4 ++--
 .../tests/test_per_partition_automatic_failover_async.py    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
index b9a161829fc3..437c25556e05 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover.py
@@ -108,7 +108,7 @@ def test_ppaf_partition_info_cache_and_routing(self, write_operation, error, exc
         # taken care of on the service.
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, 1,
-                                                                                                        write_operation == BATCH, exclude_regions=exclude_regions)
+                                                                                                        write_operation == BATCH, exclude_client_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -222,7 +222,7 @@ def test_ppaf_session_unavailable_retry(self, write_operation, error, exclude_re
         error_lambda = lambda r: FaultInjectionTransport.error_after_delay(0, error)
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = self.setup_info(error_lambda, max_count=1,
                                                                                                         is_batch=write_operation==BATCH,
-                                                                                                        session_error=True, exclude_regions=exclude_regions)
+                                                                                                        session_error=True, exclude_client_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
diff --git a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
index 7fa4982dd70a..860727c18b0b 100644
--- a/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
+++ b/sdk/cosmos/azure-cosmos/tests/test_per_partition_automatic_failover_async.py
@@ -103,7 +103,7 @@ async def test_ppaf_partition_info_cache_and_routing_async(self, write_operation
         # entirely taken care of on the service.
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, 1,
-                                                                                                              write_operation == BATCH, exclude_regions=exclude_regions)
+                                                                                                              write_operation == BATCH, exclude_client_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -149,7 +149,7 @@ async def test_ppaf_partition_thresholds_and_routing_async(self, write_operation
         # entirely taken care of on the service.
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda,
-                                                                                                              exclude_regions=exclude_regions,)
+                                                                                                              exclude_client_regions=exclude_regions,)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager
@@ -217,7 +217,7 @@ async def test_ppaf_session_unavailable_retry_async(self, write_operation, error
         error_lambda = lambda r: asyncio.create_task(FaultInjectionTransportAsync.error_after_delay(0, error))
         setup, doc_fail_id, doc_success_id, custom_setup, custom_transport, predicate = await self.setup_info(error_lambda, max_count=1,
                                                                                                         is_batch=write_operation==BATCH,
-                                                                                                        session_error=True, exclude_regions=exclude_regions)
+                                                                                                        session_error=True, exclude_client_regions=exclude_regions)
         container = setup['col']
         fault_injection_container = custom_setup['col']
         global_endpoint_manager = fault_injection_container.client_connection._global_endpoint_manager

From cdfdc0187c9bb706870050bac58aed7618f35ab9 Mon Sep 17 00:00:00 2001
From: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:49:05 -0500
Subject: [PATCH 68/68] add lock to failure additions

---
 .../azure/cosmos/_partition_health_tracker.py             | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
index 8218950a8dff..50f4c79bceb4 100644
--- a/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
+++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py
@@ -312,11 +312,13 @@ class _PPAFPartitionThresholdsTracker(object):
 
     def __init__(self) -> None:
         self.pk_range_wrapper_to_failure_count: dict[PartitionKeyRangeWrapper, int] = {}
+        self._failure_lock = threading.Lock()
 
     def add_failure(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
-        if pk_range_wrapper not in self.pk_range_wrapper_to_failure_count:
-            self.pk_range_wrapper_to_failure_count[pk_range_wrapper] = 0
-        self.pk_range_wrapper_to_failure_count[pk_range_wrapper] += 1
+        with self._failure_lock:
+            if pk_range_wrapper not in self.pk_range_wrapper_to_failure_count:
+                self.pk_range_wrapper_to_failure_count[pk_range_wrapper] = 0
+            self.pk_range_wrapper_to_failure_count[pk_range_wrapper] += 1
 
     def clear_pk_failures(self, pk_range_wrapper: PartitionKeyRangeWrapper) -> None:
         if pk_range_wrapper in self.pk_range_wrapper_to_failure_count: