diff --git a/Microsoft.Azure.Cosmos.Samples/Usage/Hedging/ReadRequestDiagnosticsExample.json b/Microsoft.Azure.Cosmos.Samples/Usage/Hedging/ReadRequestDiagnosticsExample.json new file mode 100644 index 0000000000..beac4c6014 --- /dev/null +++ b/Microsoft.Azure.Cosmos.Samples/Usage/Hedging/ReadRequestDiagnosticsExample.json @@ -0,0 +1,329 @@ +{ + "Summary": { + "DirectCalls": { + "(200, 0)": 1 + }, + "GatewayCalls": { + "(200, 0)": 1 + } + }, + "name": "ReadItemAsync", + "start datetime": "2025-05-09T16:15:21.287Z", + "duration in milliseconds": 1131.2238, + "data": { + "Client Configuration": { + "Client Created Time Utc": "2025-05-09T16:15:19.8917662Z", + "MachineId": "hashedMachineName:94d755e6-4bd9-6d68-c9d4-22b4d44d5b96", + "NumberOfClientsCreated": 1, + "NumberOfActiveClients": 1, + "ConnectionMode": "Direct", + "User Agent": "cosmos-netstandard-sdk/3.49.0|2|X64|Microsoft Windows 10.0.26100|.NET 6.0.36|L|", + "ConnectionConfig": { + "gw": "(cps:50, urto:6, p:False, httpf: False)", + "rntbd": "(cto: 5, icto: -1, mrpc: 30, mcpe: 65535, erd: True, pr: ReuseUnicastPort)", + "other": "(ed:False, be:False)" + }, + "ConsistencyConfig": "(consistency: NotSet, prgns:[West US 3, West US], apprgn: )", + "ProcessorCount": 12 + }, + "Hedge Config": "t:100ms, s:50ms, w:False", + "Hedge Context": [ + "West US 3", + "West US" + ] + }, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", + "duration in milliseconds": 1129.3323, + "children": [ + { + "name": "Get Collection Cache", + "duration in milliseconds": 0.007 + }, + { + "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", + "duration in milliseconds": 1151.0061, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.TelemetryHandler", + "duration in milliseconds": 1151.004, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.RetryHandler", + "duration in milliseconds": 1151.0058, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.RouterHandler", + "duration in milliseconds": 1151.0004, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.TransportHandler", + "duration in milliseconds": 1151.002, + "children": [ + { + "name": "Microsoft.Azure.Documents.ServerStoreModel Transport Request", + "duration in milliseconds": 1150.9186, + "data": { + "Client Side Request Stats": { + "Id": "AggregatedClientSideRequestStatistics", + "ContactedReplicas": [], + "RegionsContacted": [], + "FailedReplicas": [], + "AddressResolutionStatistics": [], + "StoreResponseStatistics": [] + } + } + } + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", + "duration in milliseconds": 992.2633, + "data": { + "System Info": { + "systemHistory": [ + { + "dateUtc": "2025-05-09T16:14:24.8037599Z", + "cpu": 47.922, + "memory": 594752.000, + "threadInfo": { + "isThreadStarving": "False", + "threadWaitIntervalInMs": 0.9073, + "availableThreads": 32765, + "minThreads": 12, + "maxThreads": 32767 + }, + "numberOfOpenTcpConnection": 3 + }, + { + "dateUtc": "2025-05-09T16:14:34.8179966Z", + "cpu": 38.084, + "memory": 607580.000, + "threadInfo": { + "isThreadStarving": "False", + "threadWaitIntervalInMs": 0.167, + "availableThreads": 32765, + "minThreads": 12, + "maxThreads": 32767 + }, + "numberOfOpenTcpConnection": 3 + }, + { + "dateUtc": "2025-05-09T16:14:44.8294121Z", + "cpu": 42.072, + "memory": 1054836.000, + "threadInfo": { + "isThreadStarving": "False", + "threadWaitIntervalInMs": 0.074, + "availableThreads": 32765, + "minThreads": 12, + "maxThreads": 32767 + }, + "numberOfOpenTcpConnection": 3 + }, + { + "dateUtc": "2025-05-09T16:14:54.8360653Z", + "cpu": 36.015, + "memory": 1038760.000, + "threadInfo": { + "isThreadStarving": "False", + "threadWaitIntervalInMs": 0.1299, + "availableThreads": 32765, + "minThreads": 12, + "maxThreads": 32767 + }, + "numberOfOpenTcpConnection": 3 + }, + { + "dateUtc": "2025-05-09T16:15:04.8435121Z", + "cpu": 43.817, + "memory": 1616568.000, + "threadInfo": { + "isThreadStarving": "False", + "threadWaitIntervalInMs": 0.1434, + "availableThreads": 32765, + "minThreads": 12, + "maxThreads": 32767 + }, + "numberOfOpenTcpConnection": 3 + }, + { + "dateUtc": "2025-05-09T16:15:14.8453053Z", + "cpu": 35.239, + "memory": 1629004.000, + "threadInfo": { + "isThreadStarving": "False", + "threadWaitIntervalInMs": 0.1103, + "availableThreads": 32765, + "minThreads": 12, + "maxThreads": 32767 + }, + "numberOfOpenTcpConnection": 3 + } + ] + } + }, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.TelemetryHandler", + "duration in milliseconds": 992.2298, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.RetryHandler", + "duration in milliseconds": 992.1824, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.RouterHandler", + "duration in milliseconds": 992.1503, + "children": [ + { + "name": "Microsoft.Azure.Cosmos.Handlers.TransportHandler", + "duration in milliseconds": 992.1449, + "children": [ + { + "name": "Microsoft.Azure.Documents.ServerStoreModel Transport Request", + "duration in milliseconds": 991.3114, + "data": { + "Client Side Request Stats": { + "Id": "AggregatedClientSideRequestStatistics", + "ContactedReplicas": [ + { + "Count": 1, + "Uri": "rntbd://cdb-ms-prod-westus1-be169.documents.azure.com:14031/apps/81e4cdc8-2da0-416c-beb1-394af2e72180/services/2844fd18-25a1-46e3-896e-898a02c31a8a/partitions/1e954280-38c1-4e0c-b4bf-3e63bdfc3615/replicas/133888881810264235p/" + } + ], + "RegionsContacted": [], + "FailedReplicas": [], + "AddressResolutionStatistics": [ + { + "StartTimeUTC": "2025-05-09T16:15:21.4249712Z", + "EndTimeUTC": "2025-05-09T16:15:21.8670316Z", + "TargetEndpoint": "https://hedging-example-westus.documents.azure.com//addresses/?$resolveFor=dbs%2fZWUNAA%3d%3d%2fcolls%2fZWUNALftZ78%3d%2fdocs&$filter=protocol eq rntbd&$partitionKeyRangeIds=0" + } + ], + "StoreResponseStatistics": [ + { + "ResponseTimeUTC": "2025-05-09T16:15:22.4138215Z", + "DurationInMs": 546.1124, + "ResourceType": "Document", + "OperationType": "Read", + "RequestSessionToken": "0:0#5#7=-1#8=-1", + "LocationEndpoint": "https://hedging-example-westus.documents.azure.com/", + "StoreResult": { + "ActivityId": "a1cfa80c-cb67-4ef5-b96a-dac4a7247429", + "StatusCode": "Ok", + "SubStatusCode": "Unknown", + "LSN": 8, + "PartitionKeyRangeId": "0", + "GlobalCommittedLSN": 5, + "ItemLSN": 6, + "UsingLocalLSN": true, + "QuorumAckedLSN": 8, + "SessionToken": "0#5#7=-1#8=-1", + "CurrentWriteQuorum": 3, + "CurrentReplicaSetSize": 4, + "NumberOfReadRegions": 2, + "IsValid": true, + "StorePhysicalAddress": "rntbd://cdb-ms-prod-westus1-be169.documents.azure.com:14031/apps/81e4cdc8-2da0-416c-beb1-394af2e72180/services/2844fd18-25a1-46e3-896e-898a02c31a8a/partitions/1e954280-38c1-4e0c-b4bf-3e63bdfc3615/replicas/133888881810264235p/", + "RequestCharge": 1, + "RetryAfterInMs": null, + "BELatencyInMs": "0.293", + "ReplicaHealthStatuses": [ + "(port: 14031 | status: Unknown | lkt: 5/9/2025 4:15:21 PM)", + "(port: 14184 | status: Unknown | lkt: 5/9/2025 4:15:21 PM)", + "(port: 14356 | status: Unknown | lkt: 5/9/2025 4:15:21 PM)", + "(port: 14405 | status: Unknown | lkt: 5/9/2025 4:15:21 PM)" + ], + "transportRequestTimeline": { + "requestTimeline": [ + { + "event": "Created", + "startTimeUtc": "2025-05-09T16:15:21.8675967Z", + "durationInMs": 0.0123 + }, + { + "event": "ChannelAcquisitionStarted", + "startTimeUtc": "2025-05-09T16:15:21.8676090Z", + "durationInMs": 455.3481 + }, + { + "event": "Pipelined", + "startTimeUtc": "2025-05-09T16:15:22.3229571Z", + "durationInMs": 0.4602 + }, + { + "event": "Transit Time", + "startTimeUtc": "2025-05-09T16:15:22.3234173Z", + "durationInMs": 90.0714 + }, + { + "event": "Received", + "startTimeUtc": "2025-05-09T16:15:22.4134887Z", + "durationInMs": 0.3431 + }, + { + "event": "Completed", + "startTimeUtc": "2025-05-09T16:15:22.4138318Z", + "durationInMs": 0 + } + ], + "serviceEndpointStats": { + "inflightRequests": 1, + "openConnections": 1 + }, + "connectionStats": { + "waitforConnectionInit": "True", + "callsPendingReceive": 0, + "lastSendAttempt": "2025-05-09T16:15:22.2294194Z", + "lastSend": "2025-05-09T16:15:22.2300082Z", + "lastReceive": "2025-05-09T16:15:22.3219096Z" + }, + "requestSizeInBytes": 475, + "responseMetadataSizeInBytes": 489, + "responseBodySizeInBytes": 230 + }, + "TransportException": null + } + } + ], + "HttpResponseStats": [ + { + "StartTimeUTC": "2025-05-09T16:15:21.4250207Z", + "DurationInMs": 441.9028, + "RequestUri": "https://hedging-example-westus.documents.azure.com//addresses/?$resolveFor=dbs%2fZWUNAA%3d%3d%2fcolls%2fZWUNALftZ78%3d%2fdocs&$filter=protocol eq rntbd&$partitionKeyRangeIds=0", + "ResourceType": "Document", + "HttpMethod": "GET", + "ActivityId": "a1cfa80c-cb67-4ef5-b96a-dac4a7247429", + "StatusCode": "OK" + } + ] + } + } + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "name": "Response Serialization", + "duration in milliseconds": 0.177 + } + ] +} \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/Routing/AvailabilityStrategy/AvailabilityStrategy.cs b/Microsoft.Azure.Cosmos/src/Routing/AvailabilityStrategy/AvailabilityStrategy.cs index 0109653315..5760533564 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/AvailabilityStrategy/AvailabilityStrategy.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/AvailabilityStrategy/AvailabilityStrategy.cs @@ -26,22 +26,6 @@ public static AvailabilityStrategy DisabledStrategy() return new DisabledAvailabilityStrategy(); } - /// - /// After a request's duration passes a threshold, this strategy will send out - /// hedged request to other regions. The first hedge request will be sent after the threshold. - /// After that, the strategy will send out a request every thresholdStep - /// until the request is completed or regions are exausted - /// - /// how long before SDK begins hedging - /// Period of time between first hedge and next hedging attempts - /// the cross region hedging availability strategy - public static AvailabilityStrategy CrossRegionHedgingStrategy( - TimeSpan threshold, - TimeSpan? thresholdStep) - { - return new CrossRegionHedgingAvailabilityStrategy(threshold, thresholdStep, false); - } - /// /// After a request's duration passes a threshold, this strategy will send out /// hedged request to other regions. The first hedge request will be sent after the threshold. @@ -55,12 +39,7 @@ public static AvailabilityStrategy CrossRegionHedgingStrategy( /// This is expected and applications that adopt this feature should be prepared to handle these exceptions. /// Application might not be able to be deterministic on Create vs Replace in the case of Upsert Operations /// the cross region hedging availability -#if PREVIEW - public -#else - internal -#endif - static AvailabilityStrategy CrossRegionHedgingStrategy( + public static AvailabilityStrategy CrossRegionHedgingStrategy( TimeSpan threshold, TimeSpan? thresholdStep, bool enableMultiWriteRegionHedge = false) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json index 1647011e23..55895ac3e7 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json @@ -1,16 +1,5 @@ { "Subclasses": { - "Microsoft.Azure.Cosmos.AvailabilityStrategy;System.Object;IsAbstract:True;IsSealed:False;IsInterface:False;IsEnum:False;IsClass:True;IsValueType:False;IsNested:False;IsGenericType:False;IsSerializable:False": { - "Subclasses": {}, - "Members": { - "Microsoft.Azure.Cosmos.AvailabilityStrategy CrossRegionHedgingStrategy(System.TimeSpan, System.Nullable`1[System.TimeSpan], Boolean)": { - "Type": "Method", - "Attributes": [], - "MethodInfo": "Microsoft.Azure.Cosmos.AvailabilityStrategy CrossRegionHedgingStrategy(System.TimeSpan, System.Nullable`1[System.TimeSpan], Boolean);IsAbstract:False;IsStatic:True;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" - } - }, - "NestedTypes": {} - }, "Microsoft.Azure.Cosmos.ChangeFeedItem`1;System.Object;IsAbstract:False;IsSealed:False;IsInterface:False;IsEnum:False;IsClass:True;IsValueType:False;IsNested:False;IsGenericType:True;IsSerializable:False": { "Subclasses": {}, "Members": { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.json index 39a122ab05..4441bc7fa5 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.json @@ -166,10 +166,10 @@ "Microsoft.Azure.Cosmos.AvailabilityStrategy;System.Object;IsAbstract:True;IsSealed:False;IsInterface:False;IsEnum:False;IsClass:True;IsValueType:False;IsNested:False;IsGenericType:False;IsSerializable:False": { "Subclasses": {}, "Members": { - "Microsoft.Azure.Cosmos.AvailabilityStrategy CrossRegionHedgingStrategy(System.TimeSpan, System.Nullable`1[System.TimeSpan])": { + "Microsoft.Azure.Cosmos.AvailabilityStrategy CrossRegionHedgingStrategy(System.TimeSpan, System.Nullable`1[System.TimeSpan], Boolean)": { "Type": "Method", "Attributes": [], - "MethodInfo": "Microsoft.Azure.Cosmos.AvailabilityStrategy CrossRegionHedgingStrategy(System.TimeSpan, System.Nullable`1[System.TimeSpan]);IsAbstract:False;IsStatic:True;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + "MethodInfo": "Microsoft.Azure.Cosmos.AvailabilityStrategy CrossRegionHedgingStrategy(System.TimeSpan, System.Nullable`1[System.TimeSpan], Boolean);IsAbstract:False;IsStatic:True;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, "Microsoft.Azure.Cosmos.AvailabilityStrategy DisabledStrategy()": { "Type": "Method", diff --git a/docs/Cross Region Request Hedging.md b/docs/Cross Region Request Hedging.md index 1041a85750..17f3d31854 100644 --- a/docs/Cross Region Request Hedging.md +++ b/docs/Cross Region Request Hedging.md @@ -70,11 +70,33 @@ ItemRequestOptions requestOptions = new ItemRequestOptions() }; ``` -When enabled at the `CosmosClient` level, the availability strategy applies to all read requests unless explicitly disabled per request: ReadItem, Queries (single and cross partition), ReadMany, and ChangeFeed. It is not enabled for write requests. +When enabled at the `CosmosClient` level, the availability strategy applies to all read requests unless explicitly disabled per request: ReadItem, Queries (single and cross partition), ReadMany, and ChangeFeed. + +## Hedging for Write Requests + +Availability strategies can also be used for write requests. This feature is not enabled by default, but can be enabled by setting the `enableMultiWriteRegionHedge` parameter to `true` when creating the `CrossRegionHedgingStrategy`. This will allow the SDK to send out hedged requests for write requests as well. This feature can only be used for accounts where multi region writes are enabled. Like read requssts, the SDK will only hedge for document requests, not container, database, or other write requests. Please note that all conflict resolution must be handled by the client application. Write request hedging otherwise preforms the same as read request hedging. + +```csharp +CosmosClientOptions options = new CosmosClientOptions() +{ + AvailabilityStrategy + = AvailabilityStrategy.CrossRegionHedgingStrategy( + threshold: TimeSpan.FromSeconds(1.5), + thresholdStep: TimeSpan.FromSeconds(1), + enableMultiWriteRegionHedge: true + ), + ApplicationPreferredRegions = new List() { "East US", "West US", "Central US"}, +}; + +CosmosClient client = new CosmosClient( + accountEndpoint: "account endpoint", + authKeyOrResourceToken: "auth key or resource token", + clientOptions: options); +``` ## Diagnostics -In the diagnostics data there are two new areas of note `Response Region` and `Hedge Context` that will appear when using this feature. `Response Region` shows the region that the request is ultimately served out of. `Hedge Context` shows all the regions requests were sent to. +In the diagnostics data there are two new areas of note `Hedge Config` and `Hedge Context` that will appear when using this feature. `Hedge Config` shows what the configured availability strategy used is, along with whether hedging for write requests are enabled. `Hedge Context` shows all the regions requests were sent to. To find what region the request was sent to, look for the `StoreResponse` in the diagnostics data. A full example of a hedged request can be seen [here](https://github.com/Azure/azure-cosmos-dotnet-v3/tree/master/Microsoft.Azure.Cosmos.Samples/Usage/Hedging/ReadRequestDiagnosticsExample.json). ```json "Summary": { @@ -105,11 +127,11 @@ In the diagnostics data there are two new areas of note `Response Region` and `H "ConsistencyConfig": "(consistency: NotSet, prgns:[Central US, North Central US], apprgn: )", "ProcessorCount": 12 }, + "Hedge Config": "t:100ms, s:50ms, w:False", "Hedge Context": [ "Central US", "North Central US" ], - "Response Region": "North Central US" } ```