From 7f732e51353484edd13dd6212da75078cddad102 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:57:53 +0200 Subject: [PATCH 01/30] inital commit --- .../SemanticRerankRequestOptions.cs | 27 +++++++++++++++++++ .../src/Resource/Container/Container.cs | 16 +++++++++++ .../Resource/Container/ContainerInlineCore.cs | 14 ++++++++++ 3 files changed, 57 insertions(+) create mode 100644 Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs diff --git a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs new file mode 100644 index 0000000000..91a8330ae3 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs @@ -0,0 +1,27 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + /// + /// Request options for semantic rerank operations in Azure Cosmos DB. + /// + public class SemanticRerankRequestOptions : RequestOptions + { + /// + /// Gets or sets a value indicating whether to return the documents text in the response. Default is true. + /// + public bool ReturnDocuments { get; set; } = true; + + /// + /// Gets or sets the number of top documents to return. Default all documents are returned. + /// + public int TopK { get; set; } + + /// + /// Whether to sort the results by relevance score in descending order. + /// + public bool SortDecending { get; set; } = true; + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 379ee407f3..54d671d6fe 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1678,6 +1678,22 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilder( public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManualCheckpoint( string processorName, ChangeFeedStreamHandlerWithManualCheckpoint onChangesDelegate); + + /// + /// Rerank a list of documents using semantic reranking. + /// This method uses a semantic reranker to score and reorder the provided documents + /// based on their relevance to the given reranking context. + /// + /// The type of the key in the reranked results. + /// The type of the value in the reranked results. + /// The context or query string to use for reranking the documents. + /// A list of documents to be reranked + /// (Optional) The options for the semantic reranking request. + /// The reranking results, typically including the reranked documents and their scores. + public abstract Task> SemanticRerankAsync( + string renrankContext, + IEnumerable documents, + SemanticRerankRequestOptions options = null); /// /// Deletes all items in the Container with the specified value. diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 44a409eed1..d02e4c5b24 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -697,5 +697,19 @@ public override Task IsFeedRangePartOfAsync( y, cancellationToken: cancellationToken)); } + + public override Task> SemanticRerankAsync( + string renrankContext, + IEnumerable documents, + SemanticRerankRequestOptions options = null) + { + return this.ClientContext.OperationHelperAsync( + operationName: nameof(SemanticRerankAsync), + containerName: this.Id, + databaseName: this.Database.Id, + operationType: Documents.OperationType.SemanticRerank, + requestOptions: options, + task: (trace) => base.SemanticRerankAsync(renrankContext, documents, options, trace)); + } } } \ No newline at end of file From 11c81aae101b95cb2dea2d31c118eddd5ab9a232 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 7 Oct 2025 11:22:24 -0700 Subject: [PATCH 02/30] updates --- .../src/Inference/InferenceService.cs | 105 ++++++++++++++++++ .../SemanticRerankRequestOptions.cs | 7 +- .../src/Resource/ClientContextCore.cs | 29 +++++ .../src/Resource/Container/Container.cs | 6 +- .../Resource/Container/ContainerInlineCore.cs | 11 +- .../src/Resource/CosmosClientContext.cs | 14 +++ 6 files changed, 161 insertions(+), 11 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs new file mode 100644 index 0000000000..9bc25195ac --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -0,0 +1,105 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Net; + using System.Net.Http; + using System.Net.Http.Headers; + using System.Net.Security; + using System.Reflection; + using System.Security.Cryptography.X509Certificates; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Linq; + using Microsoft.Azure.Cosmos.Resource.CosmosExceptions; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + using Microsoft.Azure.Documents.Collections; + using Microsoft.Azure.Documents.FaultInjection; + + internal class InferenceService : IDisposable + { + private const string basePath = "dbinference.prod.azure.net/"; + + private readonly Uri inferenceEndpoint; + private readonly HttpClient httpClient; + private readonly AuthorizationTokenProvider cosmosAuthorization; + + private bool disposedValue; + + public InferenceService(CosmosClient client, AccountProperties accountProperties) + { + //Create HttpClient + HttpMessageHandler httpMessageHandler = CosmosHttpClientCore.CreateHttpClientHandler( + gatewayModeMaxConnectionLimit: client.DocumentClient.ConnectionPolicy.MaxConnectionLimit, + webProxy: null, + serverCertificateCustomValidationCallback: client.DocumentClient.ConnectionPolicy.ServerCertificateCustomValidationCallback); + + this.httpClient = new HttpClient(httpMessageHandler); + + this.CreateClientHelper(this.httpClient); + + //Set endpoints + this.inferenceEndpoint = new Uri($"https://{accountProperties.Id}.{basePath}"); + + //set authorization + this.cosmosAuthorization = client.DocumentClient.cosmosAuthorization; + } + + public async Task> SemanticRerankAsync( + string renrankContext, + IEnumerable documents, + SemanticRerankRequestOptions options = null, + CancellationToken cancellationToken = default) + { + INameValueCollection headers = new RequestNameValueCollection(); + await this.cosmosAuthorization.AddAuthorizationHeaderAsync( + headersCollection: headers, + this.inferenceEndpoint, + HttpConstants.HttpMethods.Post, + AuthorizationTokenType.PrimaryMasterKey); + + HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Post, this.inferenceEndpoint); + + throw new NotImplementedException(); + } + + private void CreateClientHelper(HttpClient httpClient) + { + httpClient.Timeout = TimeSpan.FromSeconds(120); + httpClient.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue { NoCache = true }; + + // Set requested API version header that can be used for + // version enforcement. + httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Version, + HttpConstants.Versions.CurrentVersion); + + httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Accept, RuntimeConstants.MediaTypes.Json); + } + + protected void Dispose(bool disposing) + { + if (!this.disposedValue) + { + if (disposing) + { + this.httpClient.Dispose(); + } + + this.disposedValue = true; + } + } + + public void Dispose() + { + this.Dispose(true); + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs index 91a8330ae3..d4f25e8c71 100644 --- a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs +++ b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs @@ -19,9 +19,14 @@ public class SemanticRerankRequestOptions : RequestOptions /// public int TopK { get; set; } + /// + /// Batch size for internal scoring operations + /// + public int BatchSize { get; set; } + /// /// Whether to sort the results by relevance score in descending order. /// - public bool SortDecending { get; set; } = true; + public bool Sort { get; set; } = true; } } diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index faa9f50d2a..97926230a6 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -5,6 +5,7 @@ namespace Microsoft.Azure.Cosmos { using System; + using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Net.Http; @@ -34,6 +35,7 @@ internal class ClientContextCore : CosmosClientContext private readonly string userAgent; private bool isDisposed = false; + private InferenceService inferenceService = null; private ClientContextCore( CosmosClient client, @@ -467,6 +469,32 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( cancellationToken); } + internal override async Task> SemanticRerankAsync( + string renrankContext, + IEnumerable documents, + SemanticRerankRequestOptions options = null, + CancellationToken cancellationToken = default) + { + InferenceService inferenceService = await this.GetOrCreateInferenceServiceAsync(); + return await inferenceService.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); + } + + /// + internal override async Task GetOrCreateInferenceServiceAsync() + { + AccountProperties accountProperties = await this.client.DocumentClient.GlobalEndpointManager.GetDatabaseAccountAsync() ?? throw new InvalidOperationException("Failed to retrieve AccountProperties. The response was null."); + if (this.inferenceService == null) + { + // Double check locking to avoid unnecessary locks + lock (this) + { + this.inferenceService ??= new InferenceService(this.client, accountProperties); + } + } + + return this.inferenceService; + } + public override void Dispose() { this.Dispose(true); @@ -484,6 +512,7 @@ protected virtual void Dispose(bool disposing) { this.batchExecutorCache.Dispose(); this.DocumentClient.Dispose(); + this.inferenceService?.Dispose(); } this.isDisposed = true; diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 54d671d6fe..28fa1189e2 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1688,12 +1688,14 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// The type of the value in the reranked results. /// The context or query string to use for reranking the documents. /// A list of documents to be reranked - /// (Optional) The options for the semantic reranking request. + /// (Optional) The options for the semantic reranking request. + /// (Optional) representing request cancellation. /// The reranking results, typically including the reranked documents and their scores. public abstract Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null); + SemanticRerankRequestOptions options = null, + CancellationToken cancellationToken= default); /// /// Deletes all items in the Container with the specified value. diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index d02e4c5b24..b46e83efe3 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -701,15 +701,10 @@ public override Task IsFeedRangePartOfAsync( public override Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null) + SemanticRerankRequestOptions options = null, + CancellationToken cancellationToken = default) { - return this.ClientContext.OperationHelperAsync( - operationName: nameof(SemanticRerankAsync), - containerName: this.Id, - databaseName: this.Database.Id, - operationType: Documents.OperationType.SemanticRerank, - requestOptions: options, - task: (trace) => base.SemanticRerankAsync(renrankContext, documents, options, trace)); + return this.ClientContext.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index e4c2592cc4..c9434f8599 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -5,6 +5,7 @@ namespace Microsoft.Azure.Cosmos { using System; + using System.Collections.Generic; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -132,6 +133,19 @@ internal abstract Task InitializeContainerUsingRntbdAsync( string containerLinkUri, CancellationToken cancellationToken); + internal abstract Task> SemanticRerankAsync( + string renrankContext, + IEnumerable documents, + SemanticRerankRequestOptions options = null, + CancellationToken cancellationToken = default); + + /// + /// Creates, or gets if already created, the inference service for this client + /// This will have a seperate http client that is used to make calls to the inference end point + /// + /// the inferenceService + internal abstract Task GetOrCreateInferenceServiceAsync(); + public abstract void Dispose(); } } \ No newline at end of file From bad23caa384634124035abe2850b45c4c574aed8 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:54:23 -0700 Subject: [PATCH 03/30] tests --- .../src/Inference/InferenceService.cs | 66 ++++++++++--- .../SemanticRerankRequestOptions.cs | 15 ++- .../src/Resource/Container/Container.cs | 2 +- .../SemanticRerankingIntegrationTests.cs | 99 +++++++++++++++++++ 4 files changed, 164 insertions(+), 18 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 9bc25195ac..1a19603a13 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -7,26 +7,16 @@ namespace Microsoft.Azure.Cosmos using System; using System.Collections.Generic; using System.Linq; - using System.Net; using System.Net.Http; using System.Net.Http.Headers; - using System.Net.Security; - using System.Reflection; - using System.Security.Cryptography.X509Certificates; using System.Threading; using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Core.Trace; - using Microsoft.Azure.Cosmos.Linq; - using Microsoft.Azure.Cosmos.Resource.CosmosExceptions; - using Microsoft.Azure.Cosmos.Tracing; - using Microsoft.Azure.Cosmos.Tracing.TraceData; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Collections; - using Microsoft.Azure.Documents.FaultInjection; internal class InferenceService : IDisposable { - private const string basePath = "dbinference.prod.azure.net/"; + private const string basePath = "dbinference.azure.com/"; private readonly Uri inferenceEndpoint; private readonly HttpClient httpClient; @@ -59,16 +49,35 @@ public async Task> SemanticRerankAsync>(content); } private void CreateClientHelper(HttpClient httpClient) @@ -84,6 +93,33 @@ private void CreateClientHelper(HttpClient httpClient) httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Accept, RuntimeConstants.MediaTypes.Json); } + private void AddSemanticRerankOptionsToHeders(INameValueCollection headers, SemanticRerankRequestOptions options) + { + if (options == null) + { + return; + } + + headers.Add("return_documents", options.ReturnDocuments.ToString()); + if (options.TopK > -1) + { + headers.Add("top_k", options.TopK.ToString()); + } + if (options.BatchSize > -1) + { + headers.Add("batch_size", options.BatchSize.ToString()); + } + headers.Add("sort", options.Sort.ToString()); + if (!string.IsNullOrEmpty(options.DocumentType)) + { + headers.Add("document_type", options.DocumentType); + } + if (!string.IsNullOrEmpty(options.TargetPaths)) + { + headers.Add("target_paths", options.TargetPaths); + } + } + protected void Dispose(bool disposing) { if (!this.disposedValue) diff --git a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs index d4f25e8c71..a66400670b 100644 --- a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs +++ b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs @@ -17,16 +17,27 @@ public class SemanticRerankRequestOptions : RequestOptions /// /// Gets or sets the number of top documents to return. Default all documents are returned. /// - public int TopK { get; set; } + public int TopK { get; set; } = -1; /// /// Batch size for internal scoring operations /// - public int BatchSize { get; set; } + public int BatchSize { get; set; } = -1; /// /// Whether to sort the results by relevance score in descending order. /// public bool Sort { get; set; } = true; + + /// + /// Type of document being processed. Supported values are "string" and "json". + /// + public string DocumentType { get; set; } + + /// + /// If document type is "json", the list of JSON paths to extract text from for reranking. Comma separated string. + /// + public string TargetPaths { get; set; } + } } diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 28fa1189e2..b382129e27 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1695,7 +1695,7 @@ public abstract Task> SemanticRerankAsync documents, SemanticRerankRequestOptions options = null, - CancellationToken cancellationToken= default); + CancellationToken cancellationToken = default); /// /// Deletes all items in the Container with the specified value. diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs new file mode 100644 index 0000000000..85146fe612 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -0,0 +1,99 @@ +namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests +{ + using System; + using System.ClientModel.Primitives; + using System.Collections.Generic; + using System.Data; + using System.Drawing; + using System.IO; + using System.Linq; + using System.Net; + using System.Net.Http; + using System.Text; + using System.Text.Json; + using System.Text.Json.Serialization; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.FaultInjection; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + using static Microsoft.Azure.Cosmos.Routing.GlobalPartitionEndpointManagerCore; + using static Microsoft.Azure.Cosmos.SDK.EmulatorTests.MultiRegionSetupHelpers; + + [TestClass] + public class SemanticRerankingIntegrationTests + { + private readonly string connectionString; + private CosmosClient client; + + private CosmosSystemTextJsonSerializer cosmosSystemTextJsonSerializer; + + [TestInitialize] + public void TestInitAsync() + { + this.connectionString = "_"; + + JsonSerializerOptions jsonSerializerOptions = new JsonSerializerOptions() + { + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull + }; + this.cosmosSystemTextJsonSerializer = new MultiRegionSetupHelpers.CosmosSystemTextJsonSerializer(jsonSerializerOptions); + + if (string.IsNullOrEmpty(this.connectionString)) + { + Assert.Fail("Set environment variable COSMOSDB_MULTI_REGION to run the tests"); + } + this.client = new CosmosClient( + this.connectionString, + new CosmosClientOptions() + { + Serializer = this.cosmosSystemTextJsonSerializer, + }); + } + + [TestCleanup] + public void TestCleanup() + { + this.client?.Dispose(); + } + + [TestMethod] + [TestCategory("MultiRegion")] + [Timeout(70000)] + public async Task SemanticRerankTest() + { + Database db = this.client.GetDatabase("ProductsDatabase"); + Container container = db.GetContainer("FitnessEquipment"); + + string queryString = "SELECT * FROM c WHERE c.Category = 'Cardio'"; + + List documents = new List() + { + "Berlin is the capitol of Germany", + "Paris is the capitol of France", + "Madrid is the capitol of Spain", + "Rome is the capitol of Italy", + }; + + SemanticRerankRequestOptions options = new SemanticRerankRequestOptions() + { + ReturnDocuments = true, + TopK = 10, + BatchSize = 32, + Sort = true, + }; + + IReadOnlyDictionary results = await container.SemanticRerankAsync( + queryString, + documents, + options); + + Console.WriteLine("Reranked results:"); + foreach (KeyValuePair result in results) + { + Console.WriteLine($"Document: {result.Key}, Score: {result.Value}"); + } + } + } +} From f39671636683eedc9e879737cede1bffa56b4616 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 13 Oct 2025 10:50:33 -0700 Subject: [PATCH 04/30] test changes --- ...icrosoft.Azure.Cosmos.EmulatorTests.csproj | 1 + .../SemanticRerankingIntegrationTests.cs | 62 ++++++++++++------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Microsoft.Azure.Cosmos.EmulatorTests.csproj b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Microsoft.Azure.Cosmos.EmulatorTests.csproj index efc6c67076..fdde935e9f 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Microsoft.Azure.Cosmos.EmulatorTests.csproj +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Microsoft.Azure.Cosmos.EmulatorTests.csproj @@ -51,6 +51,7 @@ + diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 85146fe612..45c6a7b45d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -1,30 +1,20 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests { using System; - using System.ClientModel.Primitives; using System.Collections.Generic; - using System.Data; - using System.Drawing; - using System.IO; - using System.Linq; - using System.Net; - using System.Net.Http; - using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Diagnostics; - using Microsoft.Azure.Cosmos.FaultInjection; + using global::Azure.Core; + using global::Azure.Identity; using Microsoft.VisualStudio.TestTools.UnitTesting; - using Newtonsoft.Json.Linq; - using static Microsoft.Azure.Cosmos.Routing.GlobalPartitionEndpointManagerCore; using static Microsoft.Azure.Cosmos.SDK.EmulatorTests.MultiRegionSetupHelpers; [TestClass] public class SemanticRerankingIntegrationTests { - private readonly string connectionString; + private string connectionString; private CosmosClient client; private CosmosSystemTextJsonSerializer cosmosSystemTextJsonSerializer; @@ -32,7 +22,10 @@ public class SemanticRerankingIntegrationTests [TestInitialize] public void TestInitAsync() { - this.connectionString = "_"; + this.connectionString = ""; + + //Create a cosmos client using AAD authentication + TokenCredential tokenCredential = new DefaultAzureCredential(); JsonSerializerOptions jsonSerializerOptions = new JsonSerializerOptions() { @@ -46,6 +39,7 @@ public void TestInitAsync() } this.client = new CosmosClient( this.connectionString, + tokenCredential, new CosmosClientOptions() { Serializer = this.cosmosSystemTextJsonSerializer, @@ -63,18 +57,38 @@ public void TestCleanup() [Timeout(70000)] public async Task SemanticRerankTest() { - Database db = this.client.GetDatabase("ProductsDatabase"); - Container container = db.GetContainer("FitnessEquipment"); + Database db = this.client.GetDatabase("virtualstore"); + Container container = db.GetContainer("sportinggoods"); + + string search_text = "integrated pull-up bar"; + + // Fix: Use string interpolation instead of raw string literal and 'f' prefix + string queryString = $@" + SELECT TOP 15 c.id, c.Name, c.Brand, c.Description + FROM c + WHERE FullTextContains(c.Description, ""{search_text}"") + ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") + "; + + string reranking_context = "most economical with multiple pulley adjustmnets and ideal for home gyms"; + + List documents = new List(); - string queryString = "SELECT * FROM c WHERE c.Category = 'Cardio'"; + FeedIterator resultSetIterator = container.GetItemQueryIterator( + new QueryDefinition(queryString), + requestOptions: new QueryRequestOptions() + { + MaxItemCount = 15, + }); - List documents = new List() + while (resultSetIterator.HasMoreResults) { - "Berlin is the capitol of Germany", - "Paris is the capitol of France", - "Madrid is the capitol of Spain", - "Rome is the capitol of Italy", - }; + FeedResponse response = await resultSetIterator.ReadNextAsync(); + foreach (JsonElement item in response) + { + documents.Add(item.ToString()); + } + } SemanticRerankRequestOptions options = new SemanticRerankRequestOptions() { @@ -85,7 +99,7 @@ public async Task SemanticRerankTest() }; IReadOnlyDictionary results = await container.SemanticRerankAsync( - queryString, + reranking_context, documents, options); From 02fc277b73b950c1178e55f695b10014fc1dd931 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:34:27 -0700 Subject: [PATCH 05/30] auth changes --- .../Authorization/AuthorizationTokenProvider.cs | 6 ++++++ .../AuthorizationTokenProviderMasterKey.cs | 5 +++++ .../AuthorizationTokenProviderResourceToken.cs | 5 +++++ .../AuthorizationTokenProviderTokenCredential.cs | 16 ++++++++++++++++ ...ureKeyCredentialAuthorizationTokenProvider.cs | 5 +++++ .../src/Inference/InferenceService.cs | 9 ++++++--- 6 files changed, 43 insertions(+), 3 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProvider.cs b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProvider.cs index b7bc7a4475..9839bf1039 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProvider.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProvider.cs @@ -52,6 +52,12 @@ public abstract ValueTask GetUserAuthorizationTokenAsync( AuthorizationTokenType tokenType, ITrace trace); + public abstract ValueTask AddInferenceAuthorizationHeaderAsync( + INameValueCollection headersCollection, + Uri requestAddress, + string verb, + AuthorizationTokenType tokenType); + public abstract void TraceUnauthorized( DocumentClientException dce, string authorizationToken, diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderMasterKey.cs b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderMasterKey.cs index 134640ba10..278be856eb 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderMasterKey.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderMasterKey.cs @@ -214,6 +214,11 @@ private void Dispose(bool disposing) this.authKeyHashFunction = null; } + public override ValueTask AddInferenceAuthorizationHeaderAsync(INameValueCollection headersCollection, Uri requestAddress, string verb, AuthorizationTokenType tokenType) + { + throw new NotImplementedException("AddInferenceAuthorizationHeaderAsync is only valid for AAD"); + } + // Use C# finalizer syntax for finalization code. // This finalizer will run only if the Dispose method does not get called. // It gives your base class the opportunity to finalize. diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderResourceToken.cs b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderResourceToken.cs index 182d4c5ba7..1697589e5d 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderResourceToken.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderResourceToken.cs @@ -92,6 +92,11 @@ private void Dispose(bool disposing) // Do nothing } + public override ValueTask AddInferenceAuthorizationHeaderAsync(INameValueCollection headersCollection, Uri requestAddress, string verb, AuthorizationTokenType tokenType) + { + throw new NotImplementedException("AddInferenceAuthorizationHeaderAsync is only valid for AAD"); + } + // Use C# finalizer syntax for finalization code. // This finalizer will run only if the Dispose method does not get called. // It gives your base class the opportunity to finalize. diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs index dff20331f6..37ac40a86c 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs @@ -15,6 +15,7 @@ namespace Microsoft.Azure.Cosmos internal sealed class AuthorizationTokenProviderTokenCredential : AuthorizationTokenProvider { + private const string InferenceTokenPrefix = "Bearer "; internal readonly TokenCredentialCache tokenCredentialCache; private bool isDisposed = false; @@ -71,6 +72,21 @@ public override async ValueTask AddAuthorizationHeaderAsync( } } + public override async ValueTask AddInferenceAuthorizationHeaderAsync( + INameValueCollection headersCollection, + Uri requestAddress, + string verb, + AuthorizationTokenType tokenType) + { + using (Trace trace = Trace.GetRootTrace(nameof(GetUserAuthorizationTokenAsync), TraceComponent.Authorization, TraceLevel.Info)) + { + string token = await this.tokenCredentialCache.GetTokenAsync(trace); + + string inferenceToken = InferenceTokenPrefix + token; + headersCollection.Add(HttpConstants.HttpHeaders.Authorization, inferenceToken); + } + } + public override void TraceUnauthorized( DocumentClientException dce, string authorizationToken, diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AzureKeyCredentialAuthorizationTokenProvider.cs b/Microsoft.Azure.Cosmos/src/Authorization/AzureKeyCredentialAuthorizationTokenProvider.cs index 602c419a6c..03a0bff2a4 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AzureKeyCredentialAuthorizationTokenProvider.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AzureKeyCredentialAuthorizationTokenProvider.cs @@ -125,5 +125,10 @@ private void CheckAndRefreshTokenProvider() } } } + + public override ValueTask AddInferenceAuthorizationHeaderAsync(INameValueCollection headersCollection, Uri requestAddress, string verb, AuthorizationTokenType tokenType) + { + throw new NotImplementedException("AddInferenceAuthorizationHeaderAsync is only valid for AAD"); + } } } diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 1a19603a13..bd0d30d34c 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -16,7 +16,8 @@ namespace Microsoft.Azure.Cosmos internal class InferenceService : IDisposable { - private const string basePath = "dbinference.azure.com/"; + private const string basePath = "dbinference.azure.com/inference/semanticReranking"; + private const string inferenceUserAgent = "cosmos-inference-dotnet"; private readonly Uri inferenceEndpoint; private readonly HttpClient httpClient; @@ -51,7 +52,7 @@ public async Task> SemanticRerankAsync>(content); } From e16e98852821cf6bca0628dcf68020237d359f17 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:43:51 -0700 Subject: [PATCH 06/30] test changes --- Microsoft.Azure.Cosmos.sln | 16 ++++++- .../src/Inference/InferenceService.cs | 46 ++++++++++++------- .../SemanticRerankingIntegrationTests.cs | 9 +++- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/Microsoft.Azure.Cosmos.sln b/Microsoft.Azure.Cosmos.sln index cec2d9a5f4..338038a043 100644 --- a/Microsoft.Azure.Cosmos.sln +++ b/Microsoft.Azure.Cosmos.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.29123.88 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36603.0 d17.14 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Azure.Cosmos", "Microsoft.Azure.Cosmos\src\Microsoft.Azure.Cosmos.csproj", "{36F6F6A8-CEC8-4261-9948-903495BC3C25}" EndProject @@ -181,6 +181,18 @@ Global {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|Any CPU.Build.0 = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.ActiveCfg = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.Build.0 = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.Build.0 = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.ActiveCfg = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.Build.0 = Release|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.ActiveCfg = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.Build.0 = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|x64.ActiveCfg = Debug|Any CPU diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index bd0d30d34c..94963905e5 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -11,8 +11,10 @@ namespace Microsoft.Azure.Cosmos using System.Net.Http.Headers; using System.Threading; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Core.Collections; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Collections; + using Newtonsoft.Json.Linq; internal class InferenceService : IDisposable { @@ -56,22 +58,24 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( headersCollection: additionalHeaders, this.inferenceEndpoint, HttpConstants.HttpMethods.Post, - AuthorizationTokenType.PrimaryMasterKey); - - this.AddSemanticRerankOptionsToHeders(additionalHeaders, options); + AuthorizationTokenType.AadToken); + Console.WriteLine(this.inferenceEndpoint); + foreach (string key in additionalHeaders.AllKeys()) { + Console.WriteLine($"Adding header {key}: {additionalHeaders[key]}"); message.Headers.Add(key, additionalHeaders[key]); } - var body = new - { - query = renrankContext, - documents = documents.ToArray() - }; + Dictionary body = this.AddSemanticRerankPayload(renrankContext, documents, options); message.Content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(body)); + Console.WriteLine("\n\n\n\n\n\n\n\n\n\n\n\n\n"); + Console.WriteLine(message.Headers.ToString()); + Console.WriteLine(message.Content.ReadAsStringAsync().Result); + Console.WriteLine("\n\n\n\n\n\n\n\n\n\n\n\n\n"); + HttpResponseMessage responseMessage = await this.httpClient.SendAsync(message, cancellationToken); Console.WriteLine(responseMessage.StatusCode); Console.WriteLine(responseMessage.Content); @@ -96,31 +100,39 @@ private void CreateClientHelper(HttpClient httpClient) httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Accept, RuntimeConstants.MediaTypes.Json); } - private void AddSemanticRerankOptionsToHeders(INameValueCollection headers, SemanticRerankRequestOptions options) + private Dictionary AddSemanticRerankPayload(string rerankContext, IEnumerable documents, SemanticRerankRequestOptions options) { + Dictionary payload = new Dictionary + { + { "query", rerankContext }, + { "documents", documents.ToArray() } + }; + if (options == null) { - return; + return payload; } - - headers.Add("return_documents", options.ReturnDocuments.ToString()); + + payload["return_documents"] = options.ReturnDocuments; if (options.TopK > -1) { - headers.Add("top_k", options.TopK.ToString()); + payload["top_k"] = options.TopK; } if (options.BatchSize > -1) { - headers.Add("batch_size", options.BatchSize.ToString()); + payload["batch_size"] = options.BatchSize; } - headers.Add("sort", options.Sort.ToString()); + payload["sort"] = options.Sort; if (!string.IsNullOrEmpty(options.DocumentType)) { - headers.Add("document_type", options.DocumentType); + payload["document_type"] = options.DocumentType; } if (!string.IsNullOrEmpty(options.TargetPaths)) { - headers.Add("target_paths", options.TargetPaths); + payload["target_paths"] = options.TargetPaths; } + + return payload; } protected void Dispose(bool disposing) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 45c6a7b45d..190dbb58c1 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -24,8 +24,13 @@ public void TestInitAsync() { this.connectionString = ""; + DefaultAzureCredentialOptions options = new DefaultAzureCredentialOptions + { + TenantId = "", + }; + //Create a cosmos client using AAD authentication - TokenCredential tokenCredential = new DefaultAzureCredential(); + TokenCredential tokenCredential = new DefaultAzureCredential(options); JsonSerializerOptions jsonSerializerOptions = new JsonSerializerOptions() { @@ -73,7 +78,7 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") string reranking_context = "most economical with multiple pulley adjustmnets and ideal for home gyms"; List documents = new List(); - + Console.WriteLine("Query results:"); FeedIterator resultSetIterator = container.GetItemQueryIterator( new QueryDefinition(queryString), requestOptions: new QueryRequestOptions() From 3ad012bd63ae0246d797d09f50c72b9c013a934c Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:45:10 -0700 Subject: [PATCH 07/30] Update Microsoft.Azure.Cosmos.sln --- Microsoft.Azure.Cosmos.sln | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/Microsoft.Azure.Cosmos.sln b/Microsoft.Azure.Cosmos.sln index 338038a043..cec2d9a5f4 100644 --- a/Microsoft.Azure.Cosmos.sln +++ b/Microsoft.Azure.Cosmos.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.14.36603.0 d17.14 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29123.88 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Azure.Cosmos", "Microsoft.Azure.Cosmos\src\Microsoft.Azure.Cosmos.csproj", "{36F6F6A8-CEC8-4261-9948-903495BC3C25}" EndProject @@ -181,18 +181,6 @@ Global {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|Any CPU.Build.0 = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.ActiveCfg = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.Build.0 = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.Build.0 = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.ActiveCfg = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.Build.0 = Release|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.ActiveCfg = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.Build.0 = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|x64.ActiveCfg = Debug|Any CPU From f2e0e5b8730744b31e12c607f331cda758521384 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:35:43 -0700 Subject: [PATCH 08/30] fixed auth issues --- ...thorizationTokenProviderTokenCredential.cs | 3 ++ .../src/Inference/InferenceService.cs | 40 +++++++++++-------- .../src/Resource/ClientContextCore.cs | 4 +- .../src/Resource/Container/Container.cs | 4 +- .../Resource/Container/ContainerInlineCore.cs | 4 +- .../src/Resource/CosmosClientContext.cs | 2 +- .../SemanticRerankingIntegrationTests.cs | 11 ++--- 7 files changed, 35 insertions(+), 33 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs index 37ac40a86c..17b4305032 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs @@ -19,11 +19,14 @@ internal sealed class AuthorizationTokenProviderTokenCredential : AuthorizationT internal readonly TokenCredentialCache tokenCredentialCache; private bool isDisposed = false; + internal readonly TokenCredential tokenCredential; + public AuthorizationTokenProviderTokenCredential( TokenCredential tokenCredential, Uri accountEndpoint, TimeSpan? backgroundTokenCredentialRefreshInterval) { + this.tokenCredential = tokenCredential ?? throw new ArgumentNullException(nameof(tokenCredential)); this.tokenCredentialCache = new TokenCredentialCache( tokenCredential: tokenCredential, accountEndpoint: accountEndpoint, diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 94963905e5..6bb3c250fc 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -9,17 +9,18 @@ namespace Microsoft.Azure.Cosmos using System.Linq; using System.Net.Http; using System.Net.Http.Headers; + using System.Text; using System.Threading; using System.Threading.Tasks; - using Microsoft.Azure.Cosmos.Core.Collections; + using global::Azure.Core; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Collections; - using Newtonsoft.Json.Linq; internal class InferenceService : IDisposable { private const string basePath = "dbinference.azure.com/inference/semanticReranking"; private const string inferenceUserAgent = "cosmos-inference-dotnet"; + private const string inferenceServiceDefaultScope = "https://dbinference.azure.com/.default"; private readonly Uri inferenceEndpoint; private readonly HttpClient httpClient; @@ -43,10 +44,21 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties this.inferenceEndpoint = new Uri($"https://{accountProperties.Id}.{basePath}"); //set authorization - this.cosmosAuthorization = client.DocumentClient.cosmosAuthorization; + if (client.DocumentClient.cosmosAuthorization.GetType() != typeof(AuthorizationTokenProviderTokenCredential)) + { + throw new InvalidOperationException("InferenceService only supports AAD authentication."); + } + + AuthorizationTokenProviderTokenCredential defaultOperationTokenProvider = client.DocumentClient.cosmosAuthorization as AuthorizationTokenProviderTokenCredential; + TokenCredential tokenCredential = defaultOperationTokenProvider.tokenCredential; + + this.cosmosAuthorization = new AuthorizationTokenProviderTokenCredential( + tokenCredential: tokenCredential, + accountEndpoint: new Uri(inferenceServiceDefaultScope), + backgroundTokenCredentialRefreshInterval: client.ClientOptions?.TokenCredentialBackgroundRefreshInterval); } - public async Task> SemanticRerankAsync( + public async Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, SemanticRerankRequestOptions options = null, @@ -59,32 +71,26 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( this.inferenceEndpoint, HttpConstants.HttpMethods.Post, AuthorizationTokenType.AadToken); - Console.WriteLine(this.inferenceEndpoint); - + additionalHeaders.Add(HttpConstants.HttpHeaders.UserAgent, inferenceUserAgent); + foreach (string key in additionalHeaders.AllKeys()) { - Console.WriteLine($"Adding header {key}: {additionalHeaders[key]}"); message.Headers.Add(key, additionalHeaders[key]); } Dictionary body = this.AddSemanticRerankPayload(renrankContext, documents, options); - message.Content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(body)); - - Console.WriteLine("\n\n\n\n\n\n\n\n\n\n\n\n\n"); - Console.WriteLine(message.Headers.ToString()); - Console.WriteLine(message.Content.ReadAsStringAsync().Result); - Console.WriteLine("\n\n\n\n\n\n\n\n\n\n\n\n\n"); + message.Content = new StringContent( + Newtonsoft.Json.JsonConvert.SerializeObject(body), + Encoding.UTF8, + RuntimeConstants.MediaTypes.Json); HttpResponseMessage responseMessage = await this.httpClient.SendAsync(message, cancellationToken); - Console.WriteLine(responseMessage.StatusCode); - Console.WriteLine(responseMessage.Content); responseMessage.EnsureSuccessStatusCode(); // return the content of the responsemessage as a dictonary string content = await responseMessage.Content.ReadAsStringAsync(); - Console.WriteLine(content); - return Newtonsoft.Json.JsonConvert.DeserializeObject>(content); + return Newtonsoft.Json.JsonConvert.DeserializeObject>(content); } private void CreateClientHelper(HttpClient httpClient) diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index 97926230a6..f1eec02d9b 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -469,14 +469,14 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( cancellationToken); } - internal override async Task> SemanticRerankAsync( + internal override async Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, SemanticRerankRequestOptions options = null, CancellationToken cancellationToken = default) { InferenceService inferenceService = await this.GetOrCreateInferenceServiceAsync(); - return await inferenceService.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); + return await inferenceService.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); } /// diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index b382129e27..e9bec4293b 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1684,14 +1684,12 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// This method uses a semantic reranker to score and reorder the provided documents /// based on their relevance to the given reranking context. /// - /// The type of the key in the reranked results. - /// The type of the value in the reranked results. /// The context or query string to use for reranking the documents. /// A list of documents to be reranked /// (Optional) The options for the semantic reranking request. /// (Optional) representing request cancellation. /// The reranking results, typically including the reranked documents and their scores. - public abstract Task> SemanticRerankAsync( + public abstract Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, SemanticRerankRequestOptions options = null, diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index b46e83efe3..7aafbd2294 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -698,13 +698,13 @@ public override Task IsFeedRangePartOfAsync( cancellationToken: cancellationToken)); } - public override Task> SemanticRerankAsync( + public override Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, SemanticRerankRequestOptions options = null, CancellationToken cancellationToken = default) { - return this.ClientContext.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); + return this.ClientContext.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index c9434f8599..350d2421f8 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -133,7 +133,7 @@ internal abstract Task InitializeContainerUsingRntbdAsync( string containerLinkUri, CancellationToken cancellationToken); - internal abstract Task> SemanticRerankAsync( + internal abstract Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, SemanticRerankRequestOptions options = null, diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 190dbb58c1..0ded5d4bba 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -1,6 +1,5 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests { - using System; using System.Collections.Generic; using System.Text.Json; using System.Text.Json.Serialization; @@ -27,6 +26,7 @@ public void TestInitAsync() DefaultAzureCredentialOptions options = new DefaultAzureCredentialOptions { TenantId = "", + ExcludeVisualStudioCredential = true }; //Create a cosmos client using AAD authentication @@ -78,7 +78,6 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") string reranking_context = "most economical with multiple pulley adjustmnets and ideal for home gyms"; List documents = new List(); - Console.WriteLine("Query results:"); FeedIterator resultSetIterator = container.GetItemQueryIterator( new QueryDefinition(queryString), requestOptions: new QueryRequestOptions() @@ -103,16 +102,12 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") Sort = true, }; - IReadOnlyDictionary results = await container.SemanticRerankAsync( + IReadOnlyDictionary results = await container.SemanticRerankAsync( reranking_context, documents, options); - Console.WriteLine("Reranked results:"); - foreach (KeyValuePair result in results) - { - Console.WriteLine($"Document: {result.Key}, Score: {result.Value}"); - } + Assert.IsTrue(results["Scores"][0]["index"] == 4); } } } From 89238883ffa69e985577946a589d3139596c08ac Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 28 Oct 2025 14:18:10 -0700 Subject: [PATCH 09/30] addresses PR comments --- .../src/Inference/InferenceService.cs | 78 +++++++++++++------ .../SemanticRerankRequestOptions.cs | 43 ---------- .../src/Resource/ClientContextCore.cs | 2 +- .../src/Resource/Container/Container.cs | 13 +++- .../Resource/Container/ContainerInlineCore.cs | 2 +- .../src/Resource/CosmosClientContext.cs | 2 +- .../SemanticRerankingIntegrationTests.cs | 10 +-- .../Contracts/DotNetPreviewSDKAPI.json | 5 ++ 8 files changed, 76 insertions(+), 79 deletions(-) delete mode 100644 Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 6bb3c250fc..2ab62a7fa7 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -16,10 +16,16 @@ namespace Microsoft.Azure.Cosmos using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Collections; + /// + /// Provides functionality to interact with the Cosmos DB Inference Service for semantic reranking. + /// internal class InferenceService : IDisposable { + // Base path for the inference service endpoint. private const string basePath = "dbinference.azure.com/inference/semanticReranking"; + // User agent string for inference requests. private const string inferenceUserAgent = "cosmos-inference-dotnet"; + // Default scope for AAD authentication. private const string inferenceServiceDefaultScope = "https://dbinference.azure.com/.default"; private readonly Uri inferenceEndpoint; @@ -28,9 +34,15 @@ internal class InferenceService : IDisposable private bool disposedValue; + /// + /// Initializes a new instance of the class. + /// + /// The CosmosClient instance. + /// The account properties for endpoint construction. + /// Thrown if AAD authentication is not used. public InferenceService(CosmosClient client, AccountProperties accountProperties) { - //Create HttpClient + // Create and configure HttpClient for inference requests. HttpMessageHandler httpMessageHandler = CosmosHttpClientCore.CreateHttpClientHandler( gatewayModeMaxConnectionLimit: client.DocumentClient.ConnectionPolicy.MaxConnectionLimit, webProxy: null, @@ -40,15 +52,16 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties this.CreateClientHelper(this.httpClient); - //Set endpoints + // Construct the inference service endpoint URI. this.inferenceEndpoint = new Uri($"https://{accountProperties.Id}.{basePath}"); - //set authorization + // Ensure AAD authentication is used. if (client.DocumentClient.cosmosAuthorization.GetType() != typeof(AuthorizationTokenProviderTokenCredential)) { throw new InvalidOperationException("InferenceService only supports AAD authentication."); } + // Set up token credential for authorization. AuthorizationTokenProviderTokenCredential defaultOperationTokenProvider = client.DocumentClient.cosmosAuthorization as AuthorizationTokenProviderTokenCredential; TokenCredential tokenCredential = defaultOperationTokenProvider.tokenCredential; @@ -58,12 +71,21 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties backgroundTokenCredentialRefreshInterval: client.ClientOptions?.TokenCredentialBackgroundRefreshInterval); } + /// + /// Sends a semantic rerank request to the inference service. + /// + /// The context/query for reranking. + /// The documents to be reranked. + /// Optional additional options for the request. + /// Cancellation token. + /// A dictionary containing the reranked results. public async Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null, + IDictionary options = null, CancellationToken cancellationToken = default) { + // Prepare HTTP request for semantic reranking. HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Post, this.inferenceEndpoint); INameValueCollection additionalHeaders = new RequestNameValueCollection(); await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( @@ -73,11 +95,13 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( AuthorizationTokenType.AadToken); additionalHeaders.Add(HttpConstants.HttpHeaders.UserAgent, inferenceUserAgent); + // Add all headers to the HTTP request. foreach (string key in additionalHeaders.AllKeys()) { message.Headers.Add(key, additionalHeaders[key]); } + // Build the request payload. Dictionary body = this.AddSemanticRerankPayload(renrankContext, documents, options); message.Content = new StringContent( @@ -85,28 +109,39 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( Encoding.UTF8, RuntimeConstants.MediaTypes.Json); + // Send the request and ensure success. HttpResponseMessage responseMessage = await this.httpClient.SendAsync(message, cancellationToken); responseMessage.EnsureSuccessStatusCode(); - // return the content of the responsemessage as a dictonary + // Deserialize and return the response content as a dictionary. string content = await responseMessage.Content.ReadAsStringAsync(); return Newtonsoft.Json.JsonConvert.DeserializeObject>(content); } + /// + /// Configures the provided HttpClient with default headers and settings for inference requests. + /// + /// The HttpClient to configure. private void CreateClientHelper(HttpClient httpClient) { httpClient.Timeout = TimeSpan.FromSeconds(120); httpClient.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue { NoCache = true }; - // Set requested API version header that can be used for - // version enforcement. + // Set requested API version header for version enforcement. httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Version, HttpConstants.Versions.CurrentVersion); httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Accept, RuntimeConstants.MediaTypes.Json); } - private Dictionary AddSemanticRerankPayload(string rerankContext, IEnumerable documents, SemanticRerankRequestOptions options) + /// + /// Constructs the payload for the semantic rerank request. + /// + /// The context/query for reranking. + /// The documents to be reranked. + /// Optional additional options. + /// A dictionary representing the request payload. + private Dictionary AddSemanticRerankPayload(string rerankContext, IEnumerable documents, IDictionary options) { Dictionary payload = new Dictionary { @@ -119,28 +154,19 @@ private Dictionary AddSemanticRerankPayload(string rerankContex return payload; } - payload["return_documents"] = options.ReturnDocuments; - if (options.TopK > -1) + // Add any additional options to the payload. + foreach (string option in options.Keys) { - payload["top_k"] = options.TopK; - } - if (options.BatchSize > -1) - { - payload["batch_size"] = options.BatchSize; - } - payload["sort"] = options.Sort; - if (!string.IsNullOrEmpty(options.DocumentType)) - { - payload["document_type"] = options.DocumentType; - } - if (!string.IsNullOrEmpty(options.TargetPaths)) - { - payload["target_paths"] = options.TargetPaths; + payload.Add(option, options[option].ToString()); } return payload; } + /// + /// Disposes managed resources used by the service. + /// + /// Indicates if called from Dispose. protected void Dispose(bool disposing) { if (!this.disposedValue) @@ -148,12 +174,16 @@ protected void Dispose(bool disposing) if (disposing) { this.httpClient.Dispose(); + this.cosmosAuthorization.Dispose(); } this.disposedValue = true; } } + /// + /// Disposes the service and its resources. + /// public void Dispose() { this.Dispose(true); diff --git a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs b/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs deleted file mode 100644 index a66400670b..0000000000 --- a/Microsoft.Azure.Cosmos/src/RequestOptions/SemanticRerankRequestOptions.cs +++ /dev/null @@ -1,43 +0,0 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos -{ - /// - /// Request options for semantic rerank operations in Azure Cosmos DB. - /// - public class SemanticRerankRequestOptions : RequestOptions - { - /// - /// Gets or sets a value indicating whether to return the documents text in the response. Default is true. - /// - public bool ReturnDocuments { get; set; } = true; - - /// - /// Gets or sets the number of top documents to return. Default all documents are returned. - /// - public int TopK { get; set; } = -1; - - /// - /// Batch size for internal scoring operations - /// - public int BatchSize { get; set; } = -1; - - /// - /// Whether to sort the results by relevance score in descending order. - /// - public bool Sort { get; set; } = true; - - /// - /// Type of document being processed. Supported values are "string" and "json". - /// - public string DocumentType { get; set; } - - /// - /// If document type is "json", the list of JSON paths to extract text from for reranking. Comma separated string. - /// - public string TargetPaths { get; set; } - - } -} diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index f1eec02d9b..7ad72f1c74 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -472,7 +472,7 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( internal override async Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null, + IDictionary options = null, CancellationToken cancellationToken = default) { InferenceService inferenceService = await this.GetOrCreateInferenceServiceAsync(); diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index e9bec4293b..1b8e6a2767 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1678,7 +1678,7 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilder( public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManualCheckpoint( string processorName, ChangeFeedStreamHandlerWithManualCheckpoint onChangesDelegate); - + /// /// Rerank a list of documents using semantic reranking. /// This method uses a semantic reranker to score and reorder the provided documents @@ -1688,11 +1688,16 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// A list of documents to be reranked /// (Optional) The options for the semantic reranking request. /// (Optional) representing request cancellation. - /// The reranking results, typically including the reranked documents and their scores. - public abstract Task> SemanticRerankAsync( + /// The reranking results, typically including the reranked documents and their scores. +#if PREVIEW + public +#else + internal +#endif + abstract Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null, + IDictionary options = null, CancellationToken cancellationToken = default); /// diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 7aafbd2294..6015430fe1 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -701,7 +701,7 @@ public override Task IsFeedRangePartOfAsync( public override Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null, + IDictionary options = null, CancellationToken cancellationToken = default) { return this.ClientContext.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index 350d2421f8..e4f9da86c3 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -136,7 +136,7 @@ internal abstract Task InitializeContainerUsingRntbdAsync( internal abstract Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, - SemanticRerankRequestOptions options = null, + IDictionary options = null, CancellationToken cancellationToken = default); /// diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 0ded5d4bba..804d768dff 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -94,12 +94,12 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") } } - SemanticRerankRequestOptions options = new SemanticRerankRequestOptions() + Dictionary options = new Dictionary { - ReturnDocuments = true, - TopK = 10, - BatchSize = 32, - Sort = true, + { "ReturnDocuments", true }, + { "TopK", 10 }, + { "BatchSize", 32 }, + { "Sort", true } }; IReadOnlyDictionary results = await container.SemanticRerankAsync( diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json index f5ed83ba12..1410994abb 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.json @@ -263,6 +263,11 @@ "Type": "Method", "Attributes": [], "MethodInfo": "System.Threading.Tasks.Task`1[System.Collections.Generic.IEnumerable`1[System.String]] GetPartitionKeyRangesAsync(Microsoft.Azure.Cosmos.FeedRange, System.Threading.CancellationToken);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Threading.Tasks.Task`1[System.Collections.Generic.IReadOnlyDictionary`2[System.String,System.Object]] SemanticRerankAsync(System.String, System.Collections.Generic.IEnumerable`1[System.String], System.Collections.Generic.IDictionary`2[System.String,System.Object], System.Threading.CancellationToken)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "System.Threading.Tasks.Task`1[System.Collections.Generic.IReadOnlyDictionary`2[System.String,System.Object]] SemanticRerankAsync(System.String, System.Collections.Generic.IEnumerable`1[System.String], System.Collections.Generic.IDictionary`2[System.String,System.Object], System.Threading.CancellationToken);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" } }, "NestedTypes": {} From c05501ed6d621c6a2616b62d1bd452ca7b6b64ba Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 30 Oct 2025 12:11:35 -0700 Subject: [PATCH 10/30] test Fix --- .../src/Inference/InferenceService.cs | 2 +- .../src/Resource/Container/ContainerInlineCore.cs | 7 ++++++- .../SemanticRerankingIntegrationTests.cs | 12 ++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 2ab62a7fa7..e2927a564b 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -157,7 +157,7 @@ private Dictionary AddSemanticRerankPayload(string rerankContex // Add any additional options to the payload. foreach (string option in options.Keys) { - payload.Add(option, options[option].ToString()); + payload.Add(option, options[option]); } return payload; diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 6015430fe1..2989d2b6d1 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -698,7 +698,12 @@ public override Task IsFeedRangePartOfAsync( cancellationToken: cancellationToken)); } - public override Task> SemanticRerankAsync( +#if PREVIEW + public +#else + internal +#endif + override Task> SemanticRerankAsync( string renrankContext, IEnumerable documents, IDictionary options = null, diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 804d768dff..ee6345dc43 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -21,11 +21,11 @@ public class SemanticRerankingIntegrationTests [TestInitialize] public void TestInitAsync() { - this.connectionString = ""; + this.connectionString = "https://inferencee2etest.documents.azure.com:443/"; DefaultAzureCredentialOptions options = new DefaultAzureCredentialOptions { - TenantId = "", + TenantId = "72f988bf-86f1-41af-91ab-2d7cd011db47", ExcludeVisualStudioCredential = true }; @@ -96,10 +96,10 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") Dictionary options = new Dictionary { - { "ReturnDocuments", true }, - { "TopK", 10 }, - { "BatchSize", 32 }, - { "Sort", true } + { "return_documents", true }, + { "top_k", 10 }, + { "batch_size", 32 }, + { "sort", true } }; IReadOnlyDictionary results = await container.SemanticRerankAsync( From 76f9ce1ebe3319b6d88d2f46d921984d95f2497d Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:41:29 -0800 Subject: [PATCH 11/30] Adds Semantic rerank result --- .../src/Inference/InferenceService.cs | 5 +- .../src/Inference/RerankScore.cs | 46 +++++++ .../src/Inference/SemanticRerankResult.cs | 112 ++++++++++++++++++ .../src/Resource/ClientContextCore.cs | 2 +- .../src/Resource/Container/Container.cs | 2 +- .../Resource/Container/ContainerInlineCore.cs | 2 +- .../src/Resource/CosmosClientContext.cs | 2 +- ...selineTests.StreamPointOperationsAsync.xml | 20 ++++ .../SemanticRerankingIntegrationTests.cs | 8 +- 9 files changed, 190 insertions(+), 9 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs create mode 100644 Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index e2927a564b..5e1915fa7d 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -79,7 +79,7 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties /// Optional additional options for the request. /// Cancellation token. /// A dictionary containing the reranked results. - public async Task> SemanticRerankAsync( + public async Task SemanticRerankAsync( string renrankContext, IEnumerable documents, IDictionary options = null, @@ -114,8 +114,7 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( responseMessage.EnsureSuccessStatusCode(); // Deserialize and return the response content as a dictionary. - string content = await responseMessage.Content.ReadAsStringAsync(); - return Newtonsoft.Json.JsonConvert.DeserializeObject>(content); + return await SemanticRerankResult.DeserializeSemanticRerankResultAsync(responseMessage); } /// diff --git a/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs b/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs new file mode 100644 index 0000000000..15c7200b7d --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs @@ -0,0 +1,46 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + /// + /// Represents the score assigned to a document after a reranking operation. + /// +#if PREVIEW + public +#else + internal +#endif + + class RerankScore + { + /// + /// Gets the document content or identifier that was reranked. + /// + public string Document { get; } + + /// + /// Gets the score assigned to the document after reranking. + /// + public double Score { get; } + + /// + /// Gets the original index or position of the document before reranking. + /// + public int Index { get; } + + /// + /// Initializes a new instance of the class. + /// + /// The document content or identifier. + /// The reranked score for the document. + /// The original index of the document. + public RerankScore(string document, double score, int index) + { + this.Document = document; + this.Score = score; + this.Index = index; + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs new file mode 100644 index 0000000000..ec58fb9264 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs @@ -0,0 +1,112 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Net.Http; + using System.Net.Http.Headers; + using System.Threading.Tasks; + + /// + /// Represents the result of a semantic reranking operation, including rerank scores, + /// latency, token usage, and HTTP response headers. + /// +#if PREVIEW + public +#else + internal +#endif + + class SemanticRerankResult + { + /// + /// Gets the HTTP response headers associated with the rerank operation. + /// + public HttpResponseHeaders Headers { get; } + + /// + /// Gets the list of rerank scores for the documents. + /// + public IReadOnlyList RerankScores { get; } + + /// + /// Gets the latency information for the rerank operation. + /// + public Dictionary Latency { get; } + + /// + /// Gets the token usage information for the rerank operation. + /// + public Dictionary TokenUseage { get; } + + /// + /// Initializes a new instance of the class. + /// + /// The list of rerank scores. + /// The latency information. + /// The token usage information. + /// The HTTP response headers. + private SemanticRerankResult( + IReadOnlyList rerankScores, + Dictionary latency, + Dictionary tokenUseage, + HttpResponseHeaders headers) + { + this.RerankScores = rerankScores; + this.Latency = latency; + this.TokenUseage = tokenUseage; + this.Headers = headers; + } + + /// + /// Deserializes a from an HTTP response message asynchronously. + /// + /// The HTTP response message containing the rerank result. + /// A task that represents the asynchronous operation. The task result contains the deserialized . + internal static async Task DeserializeSemanticRerankResultAsync(HttpResponseMessage responseMessage) + { + // Read the response content as a string. + string content = await responseMessage.Content.ReadAsStringAsync(); + + // Deserialize the JSON content into a dictionary. + Dictionary responseJson = Newtonsoft.Json.JsonConvert.DeserializeObject>(content); + + // Log the response JSON for debugging purposes. + Console.WriteLine("Response JSON: " + content); + + // Parse the rerank scores, latency, and token usage from the response. + return new SemanticRerankResult( + ParseRerankScores(responseJson["Scores"]), + responseJson.ContainsKey("latency") ? Newtonsoft.Json.JsonConvert.DeserializeObject>(responseJson["latency"].ToString()) : null, + responseJson.ContainsKey("token_usage") ? Newtonsoft.Json.JsonConvert.DeserializeObject>(responseJson["token_usage"].ToString()) : null, + responseMessage.Headers); + throw new NotImplementedException(); + } + + /// + /// Parses the rerank scores from the provided object. + /// + /// The object containing rerank scores, expected to be a JArray. + /// A read-only list of objects. + private static IReadOnlyList ParseRerankScores(object rerankScoresObj) + { + List rerankScores = new List(); + if (rerankScoresObj is Newtonsoft.Json.Linq.JArray rerankScoresArray) + { + foreach (Newtonsoft.Json.Linq.JToken item in rerankScoresArray) + { + // Extract document, score, and index from each item. + string document = item["document"]?.ToString(); + double score = item["score"] != null ? Convert.ToDouble(item["score"]) : 0.0; + int index = item["index"] != null ? Convert.ToInt32(item["index"]) : -1; + RerankScore rerankScore = new RerankScore(document, score, index); + rerankScores.Add(rerankScore); + } + } + return rerankScores; + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index 7ad72f1c74..bf4375a269 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -469,7 +469,7 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( cancellationToken); } - internal override async Task> SemanticRerankAsync( + internal override async Task SemanticRerankAsync( string renrankContext, IEnumerable documents, IDictionary options = null, diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 1b8e6a2767..2dfdebe997 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1694,7 +1694,7 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu #else internal #endif - abstract Task> SemanticRerankAsync( + abstract Task SemanticRerankAsync( string renrankContext, IEnumerable documents, IDictionary options = null, diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 2989d2b6d1..08c744e424 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -703,7 +703,7 @@ public override Task IsFeedRangePartOfAsync( #else internal #endif - override Task> SemanticRerankAsync( + override Task SemanticRerankAsync( string renrankContext, IEnumerable documents, IDictionary options = null, diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index e4f9da86c3..821881102d 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -133,7 +133,7 @@ internal abstract Task InitializeContainerUsingRntbdAsync( string containerLinkUri, CancellationToken cancellationToken); - internal abstract Task> SemanticRerankAsync( + internal abstract Task SemanticRerankAsync( string renrankContext, IEnumerable documents, IDictionary options = null, diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml index ad3eb33168..33b0825ae3 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml @@ -26,6 +26,7 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds + ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -55,6 +56,10 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ + { + "name": "Get Collection Cache", + "duration in milliseconds": 0 + }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, @@ -156,6 +161,7 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds + ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -185,6 +191,10 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ + { + "name": "Get Collection Cache", + "duration in milliseconds": 0 + }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, @@ -294,6 +304,7 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds + ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -323,6 +334,10 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ + { + "name": "Get Collection Cache", + "duration in milliseconds": 0 + }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, @@ -427,6 +442,7 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds + ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -456,6 +472,10 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ + { + "name": "Get Collection Cache", + "duration in milliseconds": 0 + }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index ee6345dc43..1d8b04c6ea 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -1,5 +1,6 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests { + using System; using System.Collections.Generic; using System.Text.Json; using System.Text.Json.Serialization; @@ -102,12 +103,15 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") { "sort", true } }; - IReadOnlyDictionary results = await container.SemanticRerankAsync( + SemanticRerankResult results = await container.SemanticRerankAsync( reranking_context, documents, options); - Assert.IsTrue(results["Scores"][0]["index"] == 4); + Assert.IsTrue(results.RerankScores.Count > 0); + Assert.AreEqual(4, results.RerankScores[0].Index); + Assert.IsNotNull(results.Latency); + Assert.IsNotNull(results.TokenUseage); } } } From 8fba1a1ca132590496d9952deecad8b10ec943de Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:51:16 -0800 Subject: [PATCH 12/30] fixed typo --- Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs | 6 +++--- Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs | 4 ++-- Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs | 4 ++-- .../src/Resource/Container/ContainerInlineCore.cs | 4 ++-- Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 5e1915fa7d..4828788262 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -74,13 +74,13 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties /// /// Sends a semantic rerank request to the inference service. /// - /// The context/query for reranking. + /// The context/query for reranking. /// The documents to be reranked. /// Optional additional options for the request. /// Cancellation token. /// A dictionary containing the reranked results. public async Task SemanticRerankAsync( - string renrankContext, + string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default) @@ -102,7 +102,7 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( } // Build the request payload. - Dictionary body = this.AddSemanticRerankPayload(renrankContext, documents, options); + Dictionary body = this.AddSemanticRerankPayload(rerankContext, documents, options); message.Content = new StringContent( Newtonsoft.Json.JsonConvert.SerializeObject(body), diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index bf4375a269..008577c9bd 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -470,13 +470,13 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( } internal override async Task SemanticRerankAsync( - string renrankContext, + string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default) { InferenceService inferenceService = await this.GetOrCreateInferenceServiceAsync(); - return await inferenceService.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); + return await inferenceService.SemanticRerankAsync(rerankContext, documents, options, cancellationToken); } /// diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 2dfdebe997..1ba25f6154 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1684,7 +1684,7 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// This method uses a semantic reranker to score and reorder the provided documents /// based on their relevance to the given reranking context. /// - /// The context or query string to use for reranking the documents. + /// The context or query string to use for reranking the documents. /// A list of documents to be reranked /// (Optional) The options for the semantic reranking request. /// (Optional) representing request cancellation. @@ -1695,7 +1695,7 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu internal #endif abstract Task SemanticRerankAsync( - string renrankContext, + string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default); diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 08c744e424..5646bba54a 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -704,12 +704,12 @@ public override Task IsFeedRangePartOfAsync( internal #endif override Task SemanticRerankAsync( - string renrankContext, + string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default) { - return this.ClientContext.SemanticRerankAsync(renrankContext, documents, options, cancellationToken); + return this.ClientContext.SemanticRerankAsync(rerankContext, documents, options, cancellationToken); } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index 821881102d..81b95705a6 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -134,7 +134,7 @@ internal abstract Task InitializeContainerUsingRntbdAsync( CancellationToken cancellationToken); internal abstract Task SemanticRerankAsync( - string renrankContext, + string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default); From e8445e7dcc64572de097c8055842046592367a24 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Fri, 7 Nov 2025 08:01:48 -0800 Subject: [PATCH 13/30] small changes and bugfixes --- .../src/Inference/InferenceService.cs | 23 ++++++++++++------- .../src/Inference/RerankScore.cs | 4 ++-- .../src/Inference/SemanticRerankResult.cs | 7 +++--- .../src/Resource/ClientContextCore.cs | 9 ++++---- .../src/Resource/Container/Container.cs | 2 +- .../Resource/Container/ContainerInlineCore.cs | 2 +- .../src/Resource/CosmosClientContext.cs | 4 ++-- .../SemanticRerankingIntegrationTests.cs | 2 +- 8 files changed, 29 insertions(+), 24 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 4828788262..45db75f1f6 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -22,12 +22,13 @@ namespace Microsoft.Azure.Cosmos internal class InferenceService : IDisposable { // Base path for the inference service endpoint. - private const string basePath = "dbinference.azure.com/inference/semanticReranking"; + private const string basePath = "/inference/semanticReranking"; // User agent string for inference requests. private const string inferenceUserAgent = "cosmos-inference-dotnet"; // Default scope for AAD authentication. private const string inferenceServiceDefaultScope = "https://dbinference.azure.com/.default"; + private readonly string inferenceServiceBaseUrl; private readonly Uri inferenceEndpoint; private readonly HttpClient httpClient; private readonly AuthorizationTokenProvider cosmosAuthorization; @@ -38,10 +39,16 @@ internal class InferenceService : IDisposable /// Initializes a new instance of the class. /// /// The CosmosClient instance. - /// The account properties for endpoint construction. /// Thrown if AAD authentication is not used. - public InferenceService(CosmosClient client, AccountProperties accountProperties) + public InferenceService(CosmosClient client) { + this.inferenceServiceBaseUrl = ConfigurationManager.GetEnvironmentVariable("AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT", null); + + if (string.IsNullOrEmpty(this.inferenceServiceBaseUrl)) + { + throw new ArgumentNullException("Set environment variable AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT to use inference service"); + } + // Create and configure HttpClient for inference requests. HttpMessageHandler httpMessageHandler = CosmosHttpClientCore.CreateHttpClientHandler( gatewayModeMaxConnectionLimit: client.DocumentClient.ConnectionPolicy.MaxConnectionLimit, @@ -53,7 +60,7 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties this.CreateClientHelper(this.httpClient); // Construct the inference service endpoint URI. - this.inferenceEndpoint = new Uri($"https://{accountProperties.Id}.{basePath}"); + this.inferenceEndpoint = new Uri($"{this.inferenceServiceBaseUrl}/{basePath}"); // Ensure AAD authentication is used. if (client.DocumentClient.cosmosAuthorization.GetType() != typeof(AuthorizationTokenProviderTokenCredential)) @@ -82,7 +89,7 @@ public InferenceService(CosmosClient client, AccountProperties accountProperties public async Task SemanticRerankAsync( string rerankContext, IEnumerable documents, - IDictionary options = null, + IDictionary options = null, CancellationToken cancellationToken = default) { // Prepare HTTP request for semantic reranking. @@ -102,7 +109,7 @@ await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync( } // Build the request payload. - Dictionary body = this.AddSemanticRerankPayload(rerankContext, documents, options); + Dictionary body = this.AddSemanticRerankPayload(rerankContext, documents, options); message.Content = new StringContent( Newtonsoft.Json.JsonConvert.SerializeObject(body), @@ -140,9 +147,9 @@ private void CreateClientHelper(HttpClient httpClient) /// The documents to be reranked. /// Optional additional options. /// A dictionary representing the request payload. - private Dictionary AddSemanticRerankPayload(string rerankContext, IEnumerable documents, IDictionary options) + private Dictionary AddSemanticRerankPayload(string rerankContext, IEnumerable documents, IDictionary options) { - Dictionary payload = new Dictionary + Dictionary payload = new Dictionary { { "query", rerankContext }, { "documents", documents.ToArray() } diff --git a/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs b/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs index 15c7200b7d..407658cbe7 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs @@ -18,7 +18,7 @@ class RerankScore /// /// Gets the document content or identifier that was reranked. /// - public string Document { get; } + public object Document { get; } /// /// Gets the score assigned to the document after reranking. @@ -36,7 +36,7 @@ class RerankScore /// The document content or identifier. /// The reranked score for the document. /// The original index of the document. - public RerankScore(string document, double score, int index) + public RerankScore(object document, double score, int index) { this.Document = document; this.Score = score; diff --git a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs index ec58fb9264..be58f658c7 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs @@ -83,7 +83,6 @@ internal static async Task DeserializeSemanticRerankResult responseJson.ContainsKey("latency") ? Newtonsoft.Json.JsonConvert.DeserializeObject>(responseJson["latency"].ToString()) : null, responseJson.ContainsKey("token_usage") ? Newtonsoft.Json.JsonConvert.DeserializeObject>(responseJson["token_usage"].ToString()) : null, responseMessage.Headers); - throw new NotImplementedException(); } /// @@ -99,9 +98,9 @@ private static IReadOnlyList ParseRerankScores(object rerankScoresO foreach (Newtonsoft.Json.Linq.JToken item in rerankScoresArray) { // Extract document, score, and index from each item. - string document = item["document"]?.ToString(); - double score = item["score"] != null ? Convert.ToDouble(item["score"]) : 0.0; - int index = item["index"] != null ? Convert.ToInt32(item["index"]) : -1; + object document = item["document"]; + double score = item["score"] != null ? item.Value("score") : 0.0; + int index = item["index"] != null ? item.Value("index") : -1; RerankScore rerankScore = new RerankScore(document, score, index); rerankScores.Add(rerankScore); } diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index 008577c9bd..16f2b44c33 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -472,23 +472,22 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( internal override async Task SemanticRerankAsync( string rerankContext, IEnumerable documents, - IDictionary options = null, + IDictionary options = null, CancellationToken cancellationToken = default) { - InferenceService inferenceService = await this.GetOrCreateInferenceServiceAsync(); + InferenceService inferenceService = this.GetOrCreateInferenceService(); return await inferenceService.SemanticRerankAsync(rerankContext, documents, options, cancellationToken); } /// - internal override async Task GetOrCreateInferenceServiceAsync() + internal override InferenceService GetOrCreateInferenceService() { - AccountProperties accountProperties = await this.client.DocumentClient.GlobalEndpointManager.GetDatabaseAccountAsync() ?? throw new InvalidOperationException("Failed to retrieve AccountProperties. The response was null."); if (this.inferenceService == null) { // Double check locking to avoid unnecessary locks lock (this) { - this.inferenceService ??= new InferenceService(this.client, accountProperties); + this.inferenceService ??= new InferenceService(this.client); } } diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 1ba25f6154..db0f908ee1 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1697,7 +1697,7 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu abstract Task SemanticRerankAsync( string rerankContext, IEnumerable documents, - IDictionary options = null, + IDictionary options = null, CancellationToken cancellationToken = default); /// diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 5646bba54a..29b07bf396 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -706,7 +706,7 @@ public override Task IsFeedRangePartOfAsync( override Task SemanticRerankAsync( string rerankContext, IEnumerable documents, - IDictionary options = null, + IDictionary options = null, CancellationToken cancellationToken = default) { return this.ClientContext.SemanticRerankAsync(rerankContext, documents, options, cancellationToken); diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index 81b95705a6..7f051b812b 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -136,7 +136,7 @@ internal abstract Task InitializeContainerUsingRntbdAsync( internal abstract Task SemanticRerankAsync( string rerankContext, IEnumerable documents, - IDictionary options = null, + IDictionary options = null, CancellationToken cancellationToken = default); /// @@ -144,7 +144,7 @@ internal abstract Task SemanticRerankAsync( /// This will have a seperate http client that is used to make calls to the inference end point /// /// the inferenceService - internal abstract Task GetOrCreateInferenceServiceAsync(); + internal abstract InferenceService GetOrCreateInferenceService(); public abstract void Dispose(); } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 1d8b04c6ea..59471d330a 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -23,7 +23,7 @@ public class SemanticRerankingIntegrationTests public void TestInitAsync() { this.connectionString = "https://inferencee2etest.documents.azure.com:443/"; - + Environment.SetEnvironmentVariable("AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT", "https://inferencee2etest.dbinference.azure.com"); DefaultAzureCredentialOptions options = new DefaultAzureCredentialOptions { TenantId = "72f988bf-86f1-41af-91ab-2d7cd011db47", From 57e65a7b8b176386547f0ad76f47f0dd369dba03 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Fri, 7 Nov 2025 12:45:00 -0800 Subject: [PATCH 14/30] Update SemanticRerankResult.cs --- .../src/Inference/SemanticRerankResult.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs index be58f658c7..8207023916 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs @@ -6,6 +6,7 @@ namespace Microsoft.Azure.Cosmos { using System; using System.Collections.Generic; + using System.IO; using System.Net.Http; using System.Net.Http.Headers; using System.Threading.Tasks; @@ -68,14 +69,16 @@ private SemanticRerankResult( /// A task that represents the asynchronous operation. The task result contains the deserialized . internal static async Task DeserializeSemanticRerankResultAsync(HttpResponseMessage responseMessage) { - // Read the response content as a string. - string content = await responseMessage.Content.ReadAsStringAsync(); + Stream content = await responseMessage.Content.ReadAsStreamAsync(); // Deserialize the JSON content into a dictionary. - Dictionary responseJson = Newtonsoft.Json.JsonConvert.DeserializeObject>(content); - - // Log the response JSON for debugging purposes. - Console.WriteLine("Response JSON: " + content); + Dictionary responseJson; + using (StreamReader streamReader = new StreamReader(content)) + using (Newtonsoft.Json.JsonTextReader jsonReader = new Newtonsoft.Json.JsonTextReader(streamReader)) + { + Newtonsoft.Json.JsonSerializer serializer = new Newtonsoft.Json.JsonSerializer(); + responseJson = serializer.Deserialize>(jsonReader); + } // Parse the rerank scores, latency, and token usage from the response. return new SemanticRerankResult( From 257847e445ef7c5d8203f4acac2e6ee364e6ade2 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:55:10 -0800 Subject: [PATCH 15/30] Update EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml --- ...selineTests.StreamPointOperationsAsync.xml | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml index 33b0825ae3..ad3eb33168 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/BaselineTest/TestBaseline/EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml @@ -26,7 +26,6 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds - ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -56,10 +55,6 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ - { - "name": "Get Collection Cache", - "duration in milliseconds": 0 - }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, @@ -161,7 +156,6 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds - ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -191,10 +185,6 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ - { - "name": "Get Collection Cache", - "duration in milliseconds": 0 - }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, @@ -304,7 +294,6 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds - ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -334,10 +323,6 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ - { - "name": "Get Collection Cache", - "duration in milliseconds": 0 - }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, @@ -442,7 +427,6 @@ │ Redacted To Not Change The Baselines From Run To Run │ ) └── Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds - ├── Get Collection Cache(00000000-0000-0000-0000-000000000000) Routing-Component 00:00:00:000 0.00 milliseconds └── Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler(00000000-0000-0000-0000-000000000000) RequestHandler-Component 00:00:00:000 0.00 milliseconds │ ( │ [System Info] @@ -472,10 +456,6 @@ "name": "Microsoft.Azure.Cosmos.Handlers.RequestInvokerHandler", "duration in milliseconds": 0, "children": [ - { - "name": "Get Collection Cache", - "duration in milliseconds": 0 - }, { "name": "Microsoft.Azure.Cosmos.Handlers.DiagnosticsHandler", "duration in milliseconds": 0, From d06143997141da589af4e5f443a9ae15b23d7c9d Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 18 Nov 2025 12:30:04 -0800 Subject: [PATCH 16/30] PR comments --- .../src/Inference/InferenceService.cs | 9 +- .../src/Inference/SemanticRerankResult.cs | 85 +++++++++++-------- .../src/Resource/Container/Container.cs | 6 +- 3 files changed, 63 insertions(+), 37 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 45db75f1f6..397e957de6 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -6,6 +6,7 @@ namespace Microsoft.Azure.Cosmos { using System; using System.Collections.Generic; + using System.Diagnostics; using System.Linq; using System.Net.Http; using System.Net.Http.Headers; @@ -27,7 +28,9 @@ internal class InferenceService : IDisposable private const string inferenceUserAgent = "cosmos-inference-dotnet"; // Default scope for AAD authentication. private const string inferenceServiceDefaultScope = "https://dbinference.azure.com/.default"; + private const int inferenceServiceDefaultMaxConnectionLimit = 50; + private readonly int inferenceServiceMaxConnectionLimit; private readonly string inferenceServiceBaseUrl; private readonly Uri inferenceEndpoint; private readonly HttpClient httpClient; @@ -49,9 +52,13 @@ public InferenceService(CosmosClient client) throw new ArgumentNullException("Set environment variable AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT to use inference service"); } + this.inferenceServiceMaxConnectionLimit = ConfigurationManager.GetEnvironmentVariable( + "AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_SERVICE_MAX_CONNECTION_LIMIT", + inferenceServiceDefaultMaxConnectionLimit) ?? inferenceServiceDefaultMaxConnectionLimit; + // Create and configure HttpClient for inference requests. HttpMessageHandler httpMessageHandler = CosmosHttpClientCore.CreateHttpClientHandler( - gatewayModeMaxConnectionLimit: client.DocumentClient.ConnectionPolicy.MaxConnectionLimit, + gatewayModeMaxConnectionLimit: this.inferenceServiceMaxConnectionLimit, webProxy: null, serverCertificateCustomValidationCallback: client.DocumentClient.ConnectionPolicy.ServerCertificateCustomValidationCallback); diff --git a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs index 8207023916..e228f8d0fe 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/SemanticRerankResult.cs @@ -4,11 +4,11 @@ namespace Microsoft.Azure.Cosmos { - using System; using System.Collections.Generic; using System.IO; using System.Net.Http; using System.Net.Http.Headers; + using System.Text.Json; using System.Threading.Tasks; /// @@ -71,44 +71,61 @@ internal static async Task DeserializeSemanticRerankResult { Stream content = await responseMessage.Content.ReadAsStreamAsync(); - // Deserialize the JSON content into a dictionary. - Dictionary responseJson; - using (StreamReader streamReader = new StreamReader(content)) - using (Newtonsoft.Json.JsonTextReader jsonReader = new Newtonsoft.Json.JsonTextReader(streamReader)) + using (content) { - Newtonsoft.Json.JsonSerializer serializer = new Newtonsoft.Json.JsonSerializer(); - responseJson = serializer.Deserialize>(jsonReader); - } + using (JsonDocument doc = await JsonDocument.ParseAsync(content)) + { + JsonElement root = doc.RootElement; - // Parse the rerank scores, latency, and token usage from the response. - return new SemanticRerankResult( - ParseRerankScores(responseJson["Scores"]), - responseJson.ContainsKey("latency") ? Newtonsoft.Json.JsonConvert.DeserializeObject>(responseJson["latency"].ToString()) : null, - responseJson.ContainsKey("token_usage") ? Newtonsoft.Json.JsonConvert.DeserializeObject>(responseJson["token_usage"].ToString()) : null, - responseMessage.Headers); - } + // Parse Scores + List rerankScores = new List(); + if (root.TryGetProperty("Scores", out JsonElement scoresElement) && scoresElement.ValueKind == JsonValueKind.Array) + { + foreach (JsonElement item in scoresElement.EnumerateArray()) + { + object document = null; + if (item.TryGetProperty("document", out JsonElement docElement)) + { + // Try to deserialize as an object + switch (docElement.ValueKind) + { + case JsonValueKind.Object: + document = JsonSerializer.Deserialize>(docElement.GetRawText()); + break; + case JsonValueKind.Null: + document = null; + break; + } + } - /// - /// Parses the rerank scores from the provided object. - /// - /// The object containing rerank scores, expected to be a JArray. - /// A read-only list of objects. - private static IReadOnlyList ParseRerankScores(object rerankScoresObj) - { - List rerankScores = new List(); - if (rerankScoresObj is Newtonsoft.Json.Linq.JArray rerankScoresArray) - { - foreach (Newtonsoft.Json.Linq.JToken item in rerankScoresArray) - { - // Extract document, score, and index from each item. - object document = item["document"]; - double score = item["score"] != null ? item.Value("score") : 0.0; - int index = item["index"] != null ? item.Value("index") : -1; - RerankScore rerankScore = new RerankScore(document, score, index); - rerankScores.Add(rerankScore); + double score = item.TryGetProperty("score", out JsonElement scoreElement) && scoreElement.TryGetDouble(out double s) ? s : 0.0; + int index = item.TryGetProperty("index", out JsonElement indexElement) && indexElement.TryGetInt32(out int i) ? i : -1; + + rerankScores.Add(new RerankScore(document, score, index)); + } + } + + // Parse latency + Dictionary latency = null; + if (root.TryGetProperty("latency", out JsonElement latencyElement) && latencyElement.ValueKind == JsonValueKind.Object) + { + latency = JsonSerializer.Deserialize>(latencyElement.GetRawText()); + } + + // Parse token_usage + Dictionary tokenUsage = null; + if (root.TryGetProperty("token_usage", out JsonElement tokenUsageElement) && tokenUsageElement.ValueKind == JsonValueKind.Object) + { + tokenUsage = JsonSerializer.Deserialize>(tokenUsageElement.GetRawText()); + } + + return new SemanticRerankResult( + rerankScores, + latency, + tokenUsage, + responseMessage.Headers); } } - return rerankScores; } } } diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index db0f908ee1..3464bd5601 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1682,9 +1682,11 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// /// Rerank a list of documents using semantic reranking. /// This method uses a semantic reranker to score and reorder the provided documents - /// based on their relevance to the given reranking context. + /// based on their relevance to the given reranking context. + /// + /// The sematic reranking requests will not use the regular request flow and not use the default SDK retry policies. /// - /// The context or query string to use for reranking the documents. + /// The context (ex: query string) to use for reranking the documents. /// A list of documents to be reranked /// (Optional) The options for the semantic reranking request. /// (Optional) representing request cancellation. From dd1b39ca4fee812c8f0571f948a6116bf118a7ca Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 18 Nov 2025 14:42:31 -0800 Subject: [PATCH 17/30] comments --- .../src/Resource/ClientContextCore.cs | 1 + .../src/Resource/CosmosClientContext.cs | 14 ++++++++++++++ .../SemanticRerankingIntegrationTests.cs | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs index 16f2b44c33..0014ed843a 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs @@ -469,6 +469,7 @@ await this.DocumentClient.OpenConnectionsToAllReplicasAsync( cancellationToken); } + /// internal override async Task SemanticRerankAsync( string rerankContext, IEnumerable documents, diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs index 7f051b812b..da5a613dab 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs @@ -133,6 +133,18 @@ internal abstract Task InitializeContainerUsingRntbdAsync( string containerLinkUri, CancellationToken cancellationToken); + /// + /// Rerank a list of documents using semantic reranking. + /// This method uses a semantic reranker to score and reorder the provided documents + /// based on their relevance to the given reranking context. + /// + /// The sematic reranking requests will not use the regular request flow and not use the default SDK retry policies. + /// + /// The context (ex: query string) to use for reranking the documents. + /// A list of documents to be reranked + /// (Optional) The options for the semantic reranking request. + /// (Optional) representing request cancellation. + /// The reranking results, typically including the reranked documents and their scores. internal abstract Task SemanticRerankAsync( string rerankContext, IEnumerable documents, @@ -142,6 +154,8 @@ internal abstract Task SemanticRerankAsync( /// /// Creates, or gets if already created, the inference service for this client /// This will have a seperate http client that is used to make calls to the inference end point + /// + /// This method exists in the client context so the infernece service can be easily disposed when the client is disposed /// /// the inferenceService internal abstract InferenceService GetOrCreateInferenceService(); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 59471d330a..1e8f90137a 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -59,7 +59,7 @@ public void TestCleanup() } [TestMethod] - [TestCategory("MultiRegion")] + [TestCategory("Ignore")] [Timeout(70000)] public async Task SemanticRerankTest() { From 1c40b8da3daf33e42a95f14ef81dcaa074908c0d Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:20:12 -0800 Subject: [PATCH 18/30] test fixes for preview --- .../src/Resource/Container/Container.cs | 9 +- .../Resource/Container/ContainerInlineCore.cs | 7 +- .../SemanticRerankingIntegrationTests.cs | 2 + .../Contracts/DotNetPreviewSDKAPI.net6.json | 111 +++++++++++++++++- 4 files changed, 113 insertions(+), 16 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 3464bd5601..a8a592855f 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1679,6 +1679,7 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu string processorName, ChangeFeedStreamHandlerWithManualCheckpoint onChangesDelegate); +#if PREVIEW /// /// Rerank a list of documents using semantic reranking. /// This method uses a semantic reranker to score and reorder the provided documents @@ -1691,16 +1692,12 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// (Optional) The options for the semantic reranking request. /// (Optional) representing request cancellation. /// The reranking results, typically including the reranked documents and their scores. -#if PREVIEW - public -#else - internal -#endif - abstract Task SemanticRerankAsync( + public abstract Task SemanticRerankAsync( string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default); +#endif /// /// Deletes all items in the Container with the specified value. diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs index 29b07bf396..cd38bada65 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/ContainerInlineCore.cs @@ -699,11 +699,7 @@ public override Task IsFeedRangePartOfAsync( } #if PREVIEW - public -#else - internal -#endif - override Task SemanticRerankAsync( + public override Task SemanticRerankAsync( string rerankContext, IEnumerable documents, IDictionary options = null, @@ -711,5 +707,6 @@ override Task SemanticRerankAsync( { return this.ClientContext.SemanticRerankAsync(rerankContext, documents, options, cancellationToken); } +#endif } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index 1e8f90137a..e426619fb6 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -58,6 +58,7 @@ public void TestCleanup() this.client?.Dispose(); } +#if PREVIEW [TestMethod] [TestCategory("Ignore")] [Timeout(70000)] @@ -113,5 +114,6 @@ ORDER BY RANK FullTextScore(c.Description, ""{search_text}"") Assert.IsNotNull(results.Latency); Assert.IsNotNull(results.TokenUseage); } +#endif } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.net6.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.net6.json index 855138acc6..67e92f2223 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.net6.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetPreviewSDKAPI.net6.json @@ -293,6 +293,11 @@ "Attributes": [], "MethodInfo": "Microsoft.Azure.Cosmos.ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithAllVersionsAndDeletes[T](System.String, ChangeFeedHandler`1);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:True;IsConstructor:False;IsFinal:False;" }, + "System.Threading.Tasks.Task`1[Microsoft.Azure.Cosmos.SemanticRerankResult] SemanticRerankAsync(System.String, System.Collections.Generic.IEnumerable`1[System.String], System.Collections.Generic.IDictionary`2[System.String,System.Object], System.Threading.CancellationToken)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "System.Threading.Tasks.Task`1[Microsoft.Azure.Cosmos.SemanticRerankResult] SemanticRerankAsync(System.String, System.Collections.Generic.IEnumerable`1[System.String], System.Collections.Generic.IDictionary`2[System.String,System.Object], System.Threading.CancellationToken);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "System.Threading.Tasks.Task`1[System.Boolean] IsFeedRangePartOfAsync(Microsoft.Azure.Cosmos.FeedRange, Microsoft.Azure.Cosmos.FeedRange, System.Threading.CancellationToken)": { "Type": "Method", "Attributes": [], @@ -302,11 +307,6 @@ "Type": "Method", "Attributes": [], "MethodInfo": "System.Threading.Tasks.Task`1[System.Collections.Generic.IEnumerable`1[System.String]] GetPartitionKeyRangesAsync(Microsoft.Azure.Cosmos.FeedRange, System.Threading.CancellationToken);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" - }, - "System.Threading.Tasks.Task`1[System.Collections.Generic.IReadOnlyDictionary`2[System.String,System.Object]] SemanticRerankAsync(System.String, System.Collections.Generic.IEnumerable`1[System.String], System.Collections.Generic.IDictionary`2[System.String,System.Object], System.Threading.CancellationToken)": { - "Type": "Method", - "Attributes": [], - "MethodInfo": "System.Threading.Tasks.Task`1[System.Collections.Generic.IReadOnlyDictionary`2[System.String,System.Object]] SemanticRerankAsync(System.String, System.Collections.Generic.IEnumerable`1[System.String], System.Collections.Generic.IDictionary`2[System.String,System.Object], System.Threading.CancellationToken);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" } }, "NestedTypes": {} @@ -1395,6 +1395,107 @@ }, "NestedTypes": {} }, + "Microsoft.Azure.Cosmos.RerankScore;System.Object;IsAbstract:False;IsSealed:False;IsInterface:False;IsEnum:False;IsClass:True;IsValueType:False;IsNested:False;IsGenericType:False;IsSerializable:False": { + "Subclasses": {}, + "Members": { + "Double get_Score()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "Double get_Score();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Double Score": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "Double Score;CanRead:True;CanWrite:False;Double get_Score();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Int32 get_Index()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "Int32 get_Index();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Int32 Index": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "Int32 Index;CanRead:True;CanWrite:False;Int32 get_Index();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Object Document": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "System.Object Document;CanRead:True;CanWrite:False;System.Object get_Document();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Object get_Document()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "System.Object get_Document();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Void .ctor(System.Object, Double, Int32)": { + "Type": "Constructor", + "Attributes": [], + "MethodInfo": "Void .ctor(System.Object, Double, Int32)" + } + }, + "NestedTypes": {} + }, + "Microsoft.Azure.Cosmos.SemanticRerankResult;System.Object;IsAbstract:False;IsSealed:False;IsInterface:False;IsEnum:False;IsClass:True;IsValueType:False;IsNested:False;IsGenericType:False;IsSerializable:False": { + "Subclasses": {}, + "Members": { + "System.Collections.Generic.Dictionary`2[System.String,System.Object] get_Latency()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "System.Collections.Generic.Dictionary`2[System.String,System.Object] get_Latency();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Collections.Generic.Dictionary`2[System.String,System.Object] get_TokenUseage()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "System.Collections.Generic.Dictionary`2[System.String,System.Object] get_TokenUseage();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Collections.Generic.Dictionary`2[System.String,System.Object] Latency": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "System.Collections.Generic.Dictionary`2[System.String,System.Object] Latency;CanRead:True;CanWrite:False;System.Collections.Generic.Dictionary`2[System.String,System.Object] get_Latency();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Collections.Generic.Dictionary`2[System.String,System.Object] TokenUseage": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "System.Collections.Generic.Dictionary`2[System.String,System.Object] TokenUseage;CanRead:True;CanWrite:False;System.Collections.Generic.Dictionary`2[System.String,System.Object] get_TokenUseage();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Collections.Generic.IReadOnlyList`1[Microsoft.Azure.Cosmos.RerankScore] get_RerankScores()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "System.Collections.Generic.IReadOnlyList`1[Microsoft.Azure.Cosmos.RerankScore] get_RerankScores();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Collections.Generic.IReadOnlyList`1[Microsoft.Azure.Cosmos.RerankScore] RerankScores": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "System.Collections.Generic.IReadOnlyList`1[Microsoft.Azure.Cosmos.RerankScore] RerankScores;CanRead:True;CanWrite:False;System.Collections.Generic.IReadOnlyList`1[Microsoft.Azure.Cosmos.RerankScore] get_RerankScores();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Net.Http.Headers.HttpResponseHeaders get_Headers()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "System.Net.Http.Headers.HttpResponseHeaders get_Headers();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "System.Net.Http.Headers.HttpResponseHeaders Headers": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "System.Net.Http.Headers.HttpResponseHeaders Headers;CanRead:True;CanWrite:False;System.Net.Http.Headers.HttpResponseHeaders get_Headers();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + } + }, + "NestedTypes": {} + }, "Microsoft.Azure.Cosmos.VectorIndexPath;System.Object;IsAbstract:False;IsSealed:True;IsInterface:False;IsEnum:False;IsClass:True;IsValueType:False;IsNested:False;IsGenericType:False;IsSerializable:False": { "Subclasses": {}, "Members": { From 0b9bf26ebdb1edfd241293fb22b4e27da244d7e4 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:50:47 -0800 Subject: [PATCH 19/30] Update EncryptionContainer.cs --- .../src/EncryptionContainer.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index fec9fa0f3f..dfcb0d5963 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -732,6 +732,16 @@ public override FeedIterator GetItemQueryIterator( } #if ENCRYPTIONPREVIEW + + public override Task SemanticRerankAsync( + string rerankContext, + IEnumerable documents, + IDictionary options = null, + CancellationToken cancellationToken = default) + { + throw NotImplementedException("Semantic Reranking is not supported in the Encryption package); + } + public override async Task DeleteAllItemsByPartitionKeyStreamAsync( Cosmos.PartitionKey partitionKey, RequestOptions requestOptions = null, From 287073a5e2a3a00de2f85d5b8cb8595de281ad92 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Wed, 19 Nov 2025 15:02:44 -0800 Subject: [PATCH 20/30] Update EncryptionContainer.cs --- .../src/EncryptionContainer.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index dfcb0d5963..72717e4a7b 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -732,14 +732,17 @@ public override FeedIterator GetItemQueryIterator( } #if ENCRYPTIONPREVIEW - public override Task SemanticRerankAsync( string rerankContext, IEnumerable documents, IDictionary options = null, CancellationToken cancellationToken = default) { - throw NotImplementedException("Semantic Reranking is not supported in the Encryption package); + return this.Container.SemanticRerankAsync( + rerankContext, + documents, + options, + cancellationToken); } public override async Task DeleteAllItemsByPartitionKeyStreamAsync( From 65c02373f9973dbe97d0ef02bda3c04b14f9a81c Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:28:48 -0800 Subject: [PATCH 21/30] move encryption impl to right place --- .../src/EncryptionContainer.cs | 15 +++++++++++++++ .../src/EncryptionContainer.cs | 12 ------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs index b968ad5bb9..e0da79c1db 100644 --- a/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs @@ -1008,6 +1008,21 @@ public override Task IsFeedRangePartOfAsync( } #endif +#if PREVIEW + public override Task SemanticRerankAsync( + string rerankContext, + IEnumerable documents, + IDictionary options = null, + CancellationToken cancellationToken = default) + { + return this.Container.SemanticRerankAsync( + rerankContext, + documents, + options, + cancellationToken); + } +#endif + private async Task ReadManyItemsHelperAsync( IReadOnlyList<(string id, PartitionKey partitionKey)> items, ReadManyRequestOptions readManyRequestOptions = null, diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index 72717e4a7b..c66acbea0d 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -732,18 +732,6 @@ public override FeedIterator GetItemQueryIterator( } #if ENCRYPTIONPREVIEW - public override Task SemanticRerankAsync( - string rerankContext, - IEnumerable documents, - IDictionary options = null, - CancellationToken cancellationToken = default) - { - return this.Container.SemanticRerankAsync( - rerankContext, - documents, - options, - cancellationToken); - } public override async Task DeleteAllItemsByPartitionKeyStreamAsync( Cosmos.PartitionKey partitionKey, From 38b8a782cc4a3108c939b3de05f82e0190dc0a8f Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:29:54 -0800 Subject: [PATCH 22/30] Update EncryptionContainer.cs --- Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index c66acbea0d..fec9fa0f3f 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -732,7 +732,6 @@ public override FeedIterator GetItemQueryIterator( } #if ENCRYPTIONPREVIEW - public override async Task DeleteAllItemsByPartitionKeyStreamAsync( Cosmos.PartitionKey partitionKey, RequestOptions requestOptions = null, From 24723d6c4ea46bb625c48b89ad727a9a28dc8e36 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:43:35 -0800 Subject: [PATCH 23/30] Update EncryptionContainer.cs --- .../src/EncryptionContainer.cs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index fec9fa0f3f..72717e4a7b 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -732,6 +732,19 @@ public override FeedIterator GetItemQueryIterator( } #if ENCRYPTIONPREVIEW + public override Task SemanticRerankAsync( + string rerankContext, + IEnumerable documents, + IDictionary options = null, + CancellationToken cancellationToken = default) + { + return this.Container.SemanticRerankAsync( + rerankContext, + documents, + options, + cancellationToken); + } + public override async Task DeleteAllItemsByPartitionKeyStreamAsync( Cosmos.PartitionKey partitionKey, RequestOptions requestOptions = null, From 57af7836354a645aff428e61bd0238a78675ee30 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:29:57 -0800 Subject: [PATCH 24/30] fixed preview ref --- .../src/EncryptionContainer.cs | 2 +- Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs index e0da79c1db..f997bade6d 100644 --- a/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs @@ -1008,7 +1008,7 @@ public override Task IsFeedRangePartOfAsync( } #endif -#if PREVIEW +#if PREVIEW && SDKPROJECTREF public override Task SemanticRerankAsync( string rerankContext, IEnumerable documents, diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index 72717e4a7b..d711975672 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -732,6 +732,7 @@ public override FeedIterator GetItemQueryIterator( } #if ENCRYPTIONPREVIEW +#if SDKPROJECTREF public override Task SemanticRerankAsync( string rerankContext, IEnumerable documents, @@ -744,7 +745,7 @@ public override Task SemanticRerankAsync( options, cancellationToken); } - +#endif public override async Task DeleteAllItemsByPartitionKeyStreamAsync( Cosmos.PartitionKey partitionKey, RequestOptions requestOptions = null, From 95527ac7eb9a1b0944ab39a7f9d16148bbdbad16 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:41:28 -0800 Subject: [PATCH 25/30] Update EncryptionContainer.cs --- .../src/EncryptionContainer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs index f997bade6d..48615d818b 100644 --- a/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption.Custom/src/EncryptionContainer.cs @@ -1015,7 +1015,7 @@ public override Task SemanticRerankAsync( IDictionary options = null, CancellationToken cancellationToken = default) { - return this.Container.SemanticRerankAsync( + return this.container.SemanticRerankAsync( rerankContext, documents, options, From 7d83b0b9f6de709d22b63e04e12c522fc1f567bb Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Thu, 20 Nov 2025 16:07:36 -0800 Subject: [PATCH 26/30] Update EncryptionContainer.cs --- Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs index d711975672..47f0c9ed8a 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs @@ -745,6 +745,7 @@ public override Task SemanticRerankAsync( options, cancellationToken); } + #endif public override async Task DeleteAllItemsByPartitionKeyStreamAsync( Cosmos.PartitionKey partitionKey, From 7bb337b6042e8a87e76dbfca9b44b21b5c29dabd Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 24 Nov 2025 13:45:44 -0500 Subject: [PATCH 27/30] nits --- Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs | 8 ++++++-- .../src/Resource/Container/Container.cs | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs index 397e957de6..6f808fa691 100644 --- a/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs +++ b/Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs @@ -33,8 +33,9 @@ internal class InferenceService : IDisposable private readonly int inferenceServiceMaxConnectionLimit; private readonly string inferenceServiceBaseUrl; private readonly Uri inferenceEndpoint; - private readonly HttpClient httpClient; - private readonly AuthorizationTokenProvider cosmosAuthorization; + + private HttpClient httpClient; + private AuthorizationTokenProvider cosmosAuthorization; private bool disposedValue; @@ -76,6 +77,7 @@ public InferenceService(CosmosClient client) } // Set up token credential for authorization. + // This is done to ensure the correct scope, which is different than the scope of the client, is used for the inference service. AuthorizationTokenProviderTokenCredential defaultOperationTokenProvider = client.DocumentClient.cosmosAuthorization as AuthorizationTokenProviderTokenCredential; TokenCredential tokenCredential = defaultOperationTokenProvider.tokenCredential; @@ -188,6 +190,8 @@ protected void Dispose(bool disposing) { this.httpClient.Dispose(); this.cosmosAuthorization.Dispose(); + this.httpClient = null; + this.cosmosAuthorization = null; } this.disposedValue = true; diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index a8a592855f..b9c62810c6 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1685,7 +1685,10 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// This method uses a semantic reranker to score and reorder the provided documents /// based on their relevance to the given reranking context. /// - /// The sematic reranking requests will not use the regular request flow and not use the default SDK retry policies. + /// The sematic reranking requests will not use the regular request flow and have it's own client. This will not use the default SDK retry policies. + /// + /// To use this feature, you must set up a Semantic Reranker resource in Azure and provide the endpoint and key via the environment variable: "AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT" + /// By default the Semantic Reranking will have a default max connection limit of 50, to change this set the enviroment variable "AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_SERVICE_MAX_CONNECTION_LIMIT" to the desired value before creating the CosmosClient. /// /// The context (ex: query string) to use for reranking the documents. /// A list of documents to be reranked From 4366eaaa134253938c5d988225d3223600569dcf Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:17:38 -0500 Subject: [PATCH 28/30] Update Container.cs --- .../src/Resource/Container/Container.cs | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index b9c62810c6..2801490f80 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1695,6 +1695,57 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// (Optional) The options for the semantic reranking request. /// (Optional) representing request cancellation. /// The reranking results, typically including the reranked documents and their scores. + /// /// + /// + /// documents = new List(); + /// FeedIterator resultSetIterator = container.GetItemQueryIterator( + /// new QueryDefinition(queryString), + /// requestOptions: new QueryRequestOptions() + /// { + /// MaxItemCount = 15, + /// }); + /// + /// while (resultSetIterator.HasMoreResults) + /// { + /// FeedResponse response = await resultSetIterator.ReadNextAsync(); + /// foreach (JsonElement item in response) + /// { + /// documents.Add(item.ToString()); + /// } + /// } + /// + /// Dictionary options = new Dictionary + /// { + /// { "return_documents", true }, + /// { "top_k", 10 }, + /// { "batch_size", 32 }, + /// { "sort", true } + /// }; + /// + /// SemanticRerankResult results = await container.SemanticRerankAsync( + /// reranking_context, + /// documents, + /// options); + /// + /// // results.RerankScores[0] will contain the best result for the query + /// ]]> + /// + /// public abstract Task SemanticRerankAsync( string rerankContext, IEnumerable documents, From b555e30450b0d75d9ff230c36a8d673e0f414110 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:33:21 -0500 Subject: [PATCH 29/30] updated example --- .../src/Resource/Container/Container.cs | 13 ++++++++++++- .../SemanticRerankingIntegrationTests.cs | 1 - 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs index 2801490f80..0232eb6d78 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Container/Container.cs @@ -1742,7 +1742,18 @@ public abstract ChangeFeedProcessorBuilder GetChangeFeedProcessorBuilderWithManu /// documents, /// options); /// - /// // results.RerankScores[0] will contain the best result for the query + /// // get the best resulting document from the query + /// results.RerankScores.First().Document; + /// // or the index of the document in the original list + /// results.RerankScores.First().Index; + /// // or the reranking score + /// results.RerankScores.First().Score; + /// + /// // get the latency information from the reranking operation + /// Dictonary tokenUseageInfo = results.TokenUseage; /// ]]> /// /// diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs index e426619fb6..40f4a6ae39 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SemanticRerankingIntegrationTests.cs @@ -69,7 +69,6 @@ public async Task SemanticRerankTest() string search_text = "integrated pull-up bar"; - // Fix: Use string interpolation instead of raw string literal and 'f' prefix string queryString = $@" SELECT TOP 15 c.id, c.Name, c.Brand, c.Description FROM c From c958a5dbef42b9ff9271baef87bdaf28ddb265d0 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:59:14 -0500 Subject: [PATCH 30/30] nit --- .../Authorization/AuthorizationTokenProviderTokenCredential.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs index 17b4305032..09e422bb8a 100644 --- a/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs +++ b/Microsoft.Azure.Cosmos/src/Authorization/AuthorizationTokenProviderTokenCredential.cs @@ -85,7 +85,7 @@ public override async ValueTask AddInferenceAuthorizationHeaderAsync( { string token = await this.tokenCredentialCache.GetTokenAsync(trace); - string inferenceToken = InferenceTokenPrefix + token; + string inferenceToken = $"{InferenceTokenPrefix}{token}"; headersCollection.Add(HttpConstants.HttpHeaders.Authorization, inferenceToken); } }