Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
7f732e5
inital commit
NaluTripician Sep 18, 2025
11c81aa
updates
NaluTripician Oct 7, 2025
bad23ca
tests
NaluTripician Oct 7, 2025
f396716
test changes
NaluTripician Oct 13, 2025
697d9eb
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Oct 13, 2025
02fc277
auth changes
NaluTripician Oct 16, 2025
e16e988
test changes
NaluTripician Oct 16, 2025
3ad012b
Update Microsoft.Azure.Cosmos.sln
NaluTripician Oct 16, 2025
f2e0e5b
fixed auth issues
NaluTripician Oct 22, 2025
db15b93
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Oct 22, 2025
e42274e
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Oct 23, 2025
8923888
addresses PR comments
NaluTripician Oct 28, 2025
5437698
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Oct 28, 2025
4a5afaf
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Oct 29, 2025
c05501e
test Fix
NaluTripician Oct 30, 2025
76f9ce1
Adds Semantic rerank result
NaluTripician Nov 3, 2025
8fba1a1
fixed typo
NaluTripician Nov 3, 2025
e8445e7
small changes and bugfixes
NaluTripician Nov 7, 2025
57e65a7
Update SemanticRerankResult.cs
NaluTripician Nov 7, 2025
0bd9579
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Nov 11, 2025
257847e
Update EndToEndTraceWriterBaselineTests.StreamPointOperationsAsync.xml
NaluTripician Nov 11, 2025
d061439
PR comments
NaluTripician Nov 18, 2025
ee91cd4
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Nov 18, 2025
dd1b39c
comments
NaluTripician Nov 18, 2025
ea2df60
Merge branch 'users/nalutripician/semanticRerank' of https://github.c…
NaluTripician Nov 18, 2025
1c40b8d
test fixes for preview
NaluTripician Nov 19, 2025
4638ed4
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Nov 19, 2025
0b9bf26
Update EncryptionContainer.cs
NaluTripician Nov 19, 2025
287073a
Update EncryptionContainer.cs
NaluTripician Nov 19, 2025
4ceb261
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Nov 20, 2025
65c0237
move encryption impl to right place
NaluTripician Nov 20, 2025
7f2aab4
Merge branch 'users/nalutripician/semanticRerank' of https://github.c…
NaluTripician Nov 20, 2025
38b8a78
Update EncryptionContainer.cs
NaluTripician Nov 20, 2025
24723d6
Update EncryptionContainer.cs
NaluTripician Nov 20, 2025
57af783
fixed preview ref
NaluTripician Nov 20, 2025
95527ac
Update EncryptionContainer.cs
NaluTripician Nov 20, 2025
7d83b0b
Update EncryptionContainer.cs
NaluTripician Nov 21, 2025
7bb337b
nits
NaluTripician Nov 24, 2025
4366eaa
Update Container.cs
NaluTripician Nov 24, 2025
b555e30
updated example
NaluTripician Nov 24, 2025
9d9f3dc
Merge branch 'master' into users/nalutripician/semanticRerank
NaluTripician Nov 24, 2025
c958a5d
nit
NaluTripician Nov 24, 2025
982ea78
Merge branch 'users/nalutripician/semanticRerank' of https://github.c…
NaluTripician Nov 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,21 @@ public override Task<bool> IsFeedRangePartOfAsync(
}
#endif

#if PREVIEW && SDKPROJECTREF
public override Task<SemanticRerankResult> SemanticRerankAsync(
string rerankContext,
IEnumerable<string> documents,
IDictionary<string, object> options = null,
CancellationToken cancellationToken = default)
{
return this.container.SemanticRerankAsync(
rerankContext,
documents,
options,
cancellationToken);
}
#endif

private async Task<ResponseMessage> ReadManyItemsHelperAsync(
IReadOnlyList<(string id, PartitionKey partitionKey)> items,
ReadManyRequestOptions readManyRequestOptions = null,
Expand Down
15 changes: 15 additions & 0 deletions Microsoft.Azure.Cosmos.Encryption/src/EncryptionContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,21 @@ public override FeedIterator<T> GetItemQueryIterator<T>(
}

#if ENCRYPTIONPREVIEW
#if SDKPROJECTREF
public override Task<SemanticRerankResult> SemanticRerankAsync(
string rerankContext,
IEnumerable<string> documents,
IDictionary<string, object> options = null,
CancellationToken cancellationToken = default)
{
return this.Container.SemanticRerankAsync(
rerankContext,
documents,
options,
cancellationToken);
}

#endif
public override async Task<ResponseMessage> DeleteAllItemsByPartitionKeyStreamAsync(
Cosmos.PartitionKey partitionKey,
RequestOptions requestOptions = null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ public abstract ValueTask<string> GetUserAuthorizationTokenAsync(
AuthorizationTokenType tokenType,
ITrace trace);

public abstract ValueTask AddInferenceAuthorizationHeaderAsync(
Comment thread
NaluTripician marked this conversation as resolved.
INameValueCollection headersCollection,
Uri requestAddress,
string verb,
AuthorizationTokenType tokenType);

public abstract void TraceUnauthorized(
DocumentClientException dce,
string authorizationToken,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ private void Dispose(bool disposing)
this.authKeyHashFunction = null;
}

public override ValueTask AddInferenceAuthorizationHeaderAsync(INameValueCollection headersCollection, Uri requestAddress, string verb, AuthorizationTokenType tokenType)
{
throw new NotImplementedException("AddInferenceAuthorizationHeaderAsync is only valid for AAD");
}

// Use C# finalizer syntax for finalization code.
// This finalizer will run only if the Dispose method does not get called.
// It gives your base class the opportunity to finalize.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ private void Dispose(bool disposing)
// Do nothing
}

public override ValueTask AddInferenceAuthorizationHeaderAsync(INameValueCollection headersCollection, Uri requestAddress, string verb, AuthorizationTokenType tokenType)
{
throw new NotImplementedException("AddInferenceAuthorizationHeaderAsync is only valid for AAD");
}

// Use C# finalizer syntax for finalization code.
// This finalizer will run only if the Dispose method does not get called.
// It gives your base class the opportunity to finalize.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@ namespace Microsoft.Azure.Cosmos

internal sealed class AuthorizationTokenProviderTokenCredential : AuthorizationTokenProvider
{
private const string InferenceTokenPrefix = "Bearer ";
internal readonly TokenCredentialCache tokenCredentialCache;
private bool isDisposed = false;

internal readonly TokenCredential tokenCredential;

public AuthorizationTokenProviderTokenCredential(
TokenCredential tokenCredential,
Uri accountEndpoint,
TimeSpan? backgroundTokenCredentialRefreshInterval)
{
this.tokenCredential = tokenCredential ?? throw new ArgumentNullException(nameof(tokenCredential));
this.tokenCredentialCache = new TokenCredentialCache(
tokenCredential: tokenCredential,
accountEndpoint: accountEndpoint,
Expand Down Expand Up @@ -71,6 +75,21 @@ public override async ValueTask AddAuthorizationHeaderAsync(
}
}

public override async ValueTask AddInferenceAuthorizationHeaderAsync(
INameValueCollection headersCollection,
Uri requestAddress,
string verb,
AuthorizationTokenType tokenType)
{
using (Trace trace = Trace.GetRootTrace(nameof(GetUserAuthorizationTokenAsync), TraceComponent.Authorization, TraceLevel.Info))
{
string token = await this.tokenCredentialCache.GetTokenAsync(trace);

string inferenceToken = $"{InferenceTokenPrefix}{token}";
headersCollection.Add(HttpConstants.HttpHeaders.Authorization, inferenceToken);
}
}

public override void TraceUnauthorized(
DocumentClientException dce,
string authorizationToken,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,10 @@ private void CheckAndRefreshTokenProvider()
}
}
}

public override ValueTask AddInferenceAuthorizationHeaderAsync(INameValueCollection headersCollection, Uri requestAddress, string verb, AuthorizationTokenType tokenType)
{
throw new NotImplementedException("AddInferenceAuthorizationHeaderAsync is only valid for AAD");
}
}
}
209 changes: 209 additions & 0 deletions Microsoft.Azure.Cosmos/src/Inference/InferenceService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using global::Azure.Core;
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Collections;

/// <summary>
/// Provides functionality to interact with the Cosmos DB Inference Service for semantic reranking.
/// </summary>
internal class InferenceService : IDisposable
{
// Base path for the inference service endpoint.
private const string basePath = "/inference/semanticReranking";
// User agent string for inference requests.
private const string inferenceUserAgent = "cosmos-inference-dotnet";
// Default scope for AAD authentication.
private const string inferenceServiceDefaultScope = "https://dbinference.azure.com/.default";
private const int inferenceServiceDefaultMaxConnectionLimit = 50;

private readonly int inferenceServiceMaxConnectionLimit;
private readonly string inferenceServiceBaseUrl;
private readonly Uri inferenceEndpoint;

private HttpClient httpClient;
private AuthorizationTokenProvider cosmosAuthorization;

private bool disposedValue;

/// <summary>
/// Initializes a new instance of the <see cref="InferenceService"/> class.
/// </summary>
/// <param name="client">The CosmosClient instance.</param>
/// <exception cref="InvalidOperationException">Thrown if AAD authentication is not used.</exception>
public InferenceService(CosmosClient client)
Comment thread
NaluTripician marked this conversation as resolved.
{
this.inferenceServiceBaseUrl = ConfigurationManager.GetEnvironmentVariable<string>("AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT", null);

if (string.IsNullOrEmpty(this.inferenceServiceBaseUrl))
{
throw new ArgumentNullException("Set environment variable AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT to use inference service");
}

this.inferenceServiceMaxConnectionLimit = ConfigurationManager.GetEnvironmentVariable<int?>(
"AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_SERVICE_MAX_CONNECTION_LIMIT",
inferenceServiceDefaultMaxConnectionLimit) ?? inferenceServiceDefaultMaxConnectionLimit;

// Create and configure HttpClient for inference requests.
HttpMessageHandler httpMessageHandler = CosmosHttpClientCore.CreateHttpClientHandler(
gatewayModeMaxConnectionLimit: this.inferenceServiceMaxConnectionLimit,
webProxy: null,
serverCertificateCustomValidationCallback: client.DocumentClient.ConnectionPolicy.ServerCertificateCustomValidationCallback);

this.httpClient = new HttpClient(httpMessageHandler);
Comment thread
NaluTripician marked this conversation as resolved.

this.CreateClientHelper(this.httpClient);

// Construct the inference service endpoint URI.
this.inferenceEndpoint = new Uri($"{this.inferenceServiceBaseUrl}/{basePath}");

// Ensure AAD authentication is used.
if (client.DocumentClient.cosmosAuthorization.GetType() != typeof(AuthorizationTokenProviderTokenCredential))
{
throw new InvalidOperationException("InferenceService only supports AAD authentication.");
}

// Set up token credential for authorization.
// This is done to ensure the correct scope, which is different than the scope of the client, is used for the inference service.
AuthorizationTokenProviderTokenCredential defaultOperationTokenProvider = client.DocumentClient.cosmosAuthorization as AuthorizationTokenProviderTokenCredential;
TokenCredential tokenCredential = defaultOperationTokenProvider.tokenCredential;

this.cosmosAuthorization = new AuthorizationTokenProviderTokenCredential(
Comment thread
NaluTripician marked this conversation as resolved.
tokenCredential: tokenCredential,
accountEndpoint: new Uri(inferenceServiceDefaultScope),
backgroundTokenCredentialRefreshInterval: client.ClientOptions?.TokenCredentialBackgroundRefreshInterval);
}

/// <summary>
/// Sends a semantic rerank request to the inference service.
/// </summary>
/// <param name="rerankContext">The context/query for reranking.</param>
/// <param name="documents">The documents to be reranked.</param>
/// <param name="options">Optional additional options for the request.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>A dictionary containing the reranked results.</returns>
public async Task<SemanticRerankResult> SemanticRerankAsync(
string rerankContext,
IEnumerable<string> documents,
IDictionary<string, object> options = null,
CancellationToken cancellationToken = default)
{
// Prepare HTTP request for semantic reranking.
HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Post, this.inferenceEndpoint);
INameValueCollection additionalHeaders = new RequestNameValueCollection();
await this.cosmosAuthorization.AddInferenceAuthorizationHeaderAsync(
headersCollection: additionalHeaders,
this.inferenceEndpoint,
HttpConstants.HttpMethods.Post,
AuthorizationTokenType.AadToken);
additionalHeaders.Add(HttpConstants.HttpHeaders.UserAgent, inferenceUserAgent);

// Add all headers to the HTTP request.
foreach (string key in additionalHeaders.AllKeys())
{
message.Headers.Add(key, additionalHeaders[key]);
}

// Build the request payload.
Dictionary<string, object> body = this.AddSemanticRerankPayload(rerankContext, documents, options);

message.Content = new StringContent(
Newtonsoft.Json.JsonConvert.SerializeObject(body),
Encoding.UTF8,
RuntimeConstants.MediaTypes.Json);

// Send the request and ensure success.
HttpResponseMessage responseMessage = await this.httpClient.SendAsync(message, cancellationToken);
Comment thread
NaluTripician marked this conversation as resolved.
responseMessage.EnsureSuccessStatusCode();

// Deserialize and return the response content as a dictionary.
return await SemanticRerankResult.DeserializeSemanticRerankResultAsync(responseMessage);
}

/// <summary>
/// Configures the provided HttpClient with default headers and settings for inference requests.
/// </summary>
/// <param name="httpClient">The HttpClient to configure.</param>
private void CreateClientHelper(HttpClient httpClient)
{
httpClient.Timeout = TimeSpan.FromSeconds(120);
httpClient.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue { NoCache = true };

// Set requested API version header for version enforcement.
httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Version,
HttpConstants.Versions.CurrentVersion);

httpClient.DefaultRequestHeaders.Add(HttpConstants.HttpHeaders.Accept, RuntimeConstants.MediaTypes.Json);
}

/// <summary>
/// Constructs the payload for the semantic rerank request.
/// </summary>
/// <param name="rerankContext">The context/query for reranking.</param>
/// <param name="documents">The documents to be reranked.</param>
/// <param name="options">Optional additional options.</param>
/// <returns>A dictionary representing the request payload.</returns>
private Dictionary<string, object> AddSemanticRerankPayload(string rerankContext, IEnumerable<string> documents, IDictionary<string, object> options)
{
Dictionary<string, object> payload = new Dictionary<string, object>
{
{ "query", rerankContext },
{ "documents", documents.ToArray() }
};

if (options == null)
{
return payload;
}

// Add any additional options to the payload.
foreach (string option in options.Keys)
{
payload.Add(option, options[option]);
}

return payload;
}

/// <summary>
/// Disposes managed resources used by the service.
/// </summary>
/// <param name="disposing">Indicates if called from Dispose.</param>
protected void Dispose(bool disposing)
{
if (!this.disposedValue)
{
if (disposing)
{
this.httpClient.Dispose();
Comment thread
NaluTripician marked this conversation as resolved.
this.cosmosAuthorization.Dispose();
this.httpClient = null;
this.cosmosAuthorization = null;
}

this.disposedValue = true;
}
}

/// <summary>
/// Disposes the service and its resources.
/// </summary>
public void Dispose()
{
this.Dispose(true);
}
}
}
46 changes: 46 additions & 0 deletions Microsoft.Azure.Cosmos/src/Inference/RerankScore.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
/// <summary>
/// Represents the score assigned to a document after a reranking operation.
/// </summary>
#if PREVIEW
public
#else
internal
#endif

class RerankScore
{
/// <summary>
/// Gets the document content or identifier that was reranked.
/// </summary>
public object Document { get; }

/// <summary>
/// Gets the score assigned to the document after reranking.
/// </summary>
public double Score { get; }

/// <summary>
/// Gets the original index or position of the document before reranking.
/// </summary>
public int Index { get; }

/// <summary>
/// Initializes a new instance of the <see cref="RerankScore"/> class.
/// </summary>
/// <param name="document">The document content or identifier.</param>
/// <param name="score">The reranked score for the document.</param>
/// <param name="index">The original index of the document.</param>
public RerankScore(object document, double score, int index)
{
this.Document = document;
this.Score = score;
this.Index = index;
}
}
}
Loading