Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e9996a7
feat(cosmos): add semantic rerank API for Cosmos DB Inference Service
Apr 3, 2026
6205899
fix: add rerank to CSpell dictionary for cosmos API review
Apr 6, 2026
3372c72
fix: add named snippet for semanticRerank example
Apr 6, 2026
c364a51
Add semantic rerank integration test
Apr 6, 2026
b543c0b
fix: correct RerankScore.document type and update integration tests
Apr 8, 2026
d3d8848
fix: update integration tests with self-contained FTS+rerank e2e test
Apr 8, 2026
6cef892
fix: disable fakeTimers in integration config and simplify test 3
Apr 9, 2026
66f912a
Revert global fakeTimers override, use vi.useRealTimers() locally; st…
Apr 10, 2026
d0f5c75
Skip e2e integration test that requires pre-existing DB and container
Apr 10, 2026
d0bd299
Fix TSDoc escape errors in integration test JSDoc
Apr 10, 2026
8e4a686
Apply prettier formatting to integration tests
Apr 10, 2026
fa52444
Skip all semantic rerank integration tests
Apr 10, 2026
c9362d8
fix: address PR review comments for semantic rerank feature
Copilot Apr 10, 2026
0b39a3d
fix: use cached httpClient, ErrorResponse, remove unnecessary timer h…
Apr 12, 2026
4a9ca54
fix: update JSDoc snippet to use proper guard for rerankScores
Apr 13, 2026
327f057
refactor: remove inferenceEndpoint from CosmosClientOptions
Apr 13, 2026
26fc878
chore: remove accidentally committed .tshy build artifacts
Apr 13, 2026
783699b
fix: resolve no-shadow lint error in semanticRerank.spec.ts
Apr 13, 2026
3a833d7
feat: add documentType/targetPaths options and structured error parsing
Apr 14, 2026
ad6a614
refactor: rename rerankContext to context, convert SemanticRerankOpti…
Apr 17, 2026
ce691bb
refactor(cosmos): move inference constants, use StatusCodes, extract …
Apr 22, 2026
36c5605
refactor(cosmos): add MultipleChoices (300) to StatusCodes, use in In…
Apr 22, 2026
fed6fed
chore(cosmos): regenerate API review after constants and statusCodes …
Apr 22, 2026
45acf97
feat(cosmos): add inferenceEndpoint to CosmosClientOptions
Apr 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .vscode/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,8 @@
"Parition",
"colls",
"pkranges",
"rerank",
"Rerank",
"sproc",
"sprocs",
"udfs",
Expand Down
28 changes: 28 additions & 0 deletions sdk/cosmosdb/cosmos/review/cosmos-node.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ export class ClientContext {
diagnosticNode: DiagnosticNodeInternal;
partitionKeyRangeId?: string;
}): Promise<Response_2<T & Resource>>;
semanticRerank(context: string, documents: string[], options?: SemanticRerankOptions): Promise<SemanticRerankResult>;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Remove

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check if context can be renamed.

// (undocumented)
upsert<T, U = T>(input: {
body: T;
Expand Down Expand Up @@ -700,6 +701,11 @@ export const Constants: {
DefaultEncryptionCacheTimeToLiveInSeconds: number;
EncryptionCacheRefreshIntervalInMs: number;
RequestTimeoutForReadsInMs: number;
InferenceBasePath: string;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have a separate class for InferenceConstants.

InferenceUserAgent: string;
InferenceDefaultScope: string;
InferenceDefaultTimeoutMs: number;
InferenceEndpointEnvVar: string;
};

// @public
Expand Down Expand Up @@ -731,6 +737,7 @@ export class Container {
readPartitionKeyRanges(feedOptions?: FeedOptions): QueryIterator<PartitionKeyRange>;
replace(body: ContainerDefinition, options?: RequestOptions): Promise<ContainerResponse>;
get scripts(): Scripts;
semanticRerank(context: string, documents: string[], options?: SemanticRerankOptions): Promise<SemanticRerankResult>;
get url(): string;
}

Expand Down Expand Up @@ -824,6 +831,7 @@ export interface CosmosClientOptions {
diagnosticLevel?: CosmosDbDiagnosticLevel;
endpoint?: string;
httpClient?: HttpClient;
inferenceEndpoint?: string;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove it for now?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirm it with Sajee

key?: string;
permissionFeed?: PermissionDefinition[];
resourceTokens?: {
Expand Down Expand Up @@ -2133,6 +2141,13 @@ export interface RequestOptions extends SharedOptions {
urlConnection?: string;
}

// @public
export interface RerankScore {
document: string | null;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

null or undefined

index: number;
score: number;
}

// @public (undocumented)
export interface Resource {
_etag: string;
Expand Down Expand Up @@ -2377,6 +2392,17 @@ export class Scripts {
get userDefinedFunctions(): UserDefinedFunctions;
}

// @public
export type SemanticRerankOptions = Record<string, unknown>;

// @public
export interface SemanticRerankResult {
headers: Record<string, string>;
latency: Record<string, unknown> | undefined;
rerankScores: RerankScore[];
tokenUsage: Record<string, unknown> | undefined;
}

// @public
export function setAuthorizationTokenHeaderUsingMasterKey(verb: HTTPMethod, resourceId: string, resourceType: ResourceType, headers: CosmosHeaders, masterKey: string): Promise<void>;

Expand Down Expand Up @@ -2462,6 +2488,8 @@ export interface StatusCodesType {
// (undocumented)
MethodNotAllowed: 405;
// (undocumented)
MultipleChoices: 300;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if internally handling it, then do not add it here

// (undocumented)
MultiStatus: 207;
// (undocumented)
NoContent: 204;
Expand Down
45 changes: 45 additions & 0 deletions sdk/cosmosdb/cosmos/src/ClientContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ import {
AAD_AUTH_PREFIX,
AAD_RESOURCE_NOT_FOUND_ERROR,
} from "./common/constants.js";
import { InferenceService } from "./inference/InferenceService.js";
import type { SemanticRerankOptions } from "./inference/SemanticRerankOptions.js";
import type { SemanticRerankResult } from "./inference/SemanticRerankResult.js";

const logger: AzureLogger = createClientLogger("ClientContext");

Expand All @@ -70,6 +73,7 @@ export class ClientContext {
public partitionKeyRangeCache: PartitionKeyRangeCache;
/** boolean flag to support operations with client-side encryption */
public enableEncryption: boolean = false;
private inferenceService: InferenceService | null = null;

public constructor(
private cosmosClientOptions: CosmosClientOptions,
Expand Down Expand Up @@ -1108,4 +1112,45 @@ export class ClientContext {
this.globalEndpointManager.lastKnownPPCBEnabled
);
}

/**
* Rerank a list of documents using semantic reranking via the Cosmos DB Inference Service.
* This method uses a semantic reranker to score and reorder the provided documents
* based on their relevance to the given reranking context.
*
* The semantic reranking requests use a separate HTTP pipeline and do not use
* the default SDK retry policies.
*
* @param rerankContext - The context (e.g. query string) to use for reranking.
* @param documents - The documents to be reranked.
* @param options - Optional settings for the reranking request.
* @returns The reranking results including scores, latency, and token usage.
*/
public async semanticRerank(
context: string,
documents: string[],
options?: SemanticRerankOptions,
): Promise<SemanticRerankResult> {
const service = this.getOrCreateInferenceService();
return service.semanticRerank(context, documents, options);
}

/**
* Gets or lazily creates the InferenceService instance.
* @internal
*/
private getOrCreateInferenceService(): InferenceService {
if (!this.inferenceService) {
this.inferenceService = new InferenceService(this.cosmosClientOptions);
}
return this.inferenceService;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use ??

}

/**
* Disposes the InferenceService if it was created.
* @internal
*/
public disposeInferenceService(): void {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant

this.inferenceService = null;
}
}
1 change: 1 addition & 0 deletions sdk/cosmosdb/cosmos/src/CosmosClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ export class CosmosClient {
if (this.globalPartitionEndpointManager) {
this.globalPartitionEndpointManager.dispose();
}
this.clientContext.disposeInferenceService();
}

private async backgroundRefreshEndpointList(
Expand Down
6 changes: 6 additions & 0 deletions sdk/cosmosdb/cosmos/src/CosmosClientOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,10 @@ export interface CosmosClientOptions {

/** An optional parameter that represents the connection string. Your database connection string can be found in the Azure Portal. */
connectionString?: string;

/**
* The endpoint URL for the Cosmos DB Inference Service, used for features such as semantic reranking.
* If not provided, the SDK falls back to the `AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT` environment variable.
*/
inferenceEndpoint?: string;
}
70 changes: 70 additions & 0 deletions sdk/cosmosdb/cosmos/src/client/Container/Container.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ import { MetadataLookUpType } from "../../CosmosDiagnostics.js";
import type { EncryptionSettingForProperty } from "../../encryption/index.js";
import { EncryptionProcessor } from "../../encryption/index.js";
import type { EncryptionManager } from "../../encryption/EncryptionManager.js";
import type { SemanticRerankOptions } from "../../inference/SemanticRerankOptions.js";
import type { SemanticRerankResult } from "../../inference/SemanticRerankResult.js";

/**
* Operations for reading, replacing, or deleting a specific, existing container by id.
Expand Down Expand Up @@ -691,6 +693,74 @@ export class Container {
}
}

/**
* Rerank a list of documents using semantic reranking via the Cosmos DB Inference Service.
* This method uses a semantic reranker to score and reorder the provided documents
* based on their relevance to the given context.
*
* The semantic reranking requests use a separate HTTP pipeline from the main Cosmos DB client
* and do not use the default SDK retry policies.
*
* To use this feature, you must:
* 1. Configure AAD authentication via `aadCredentials` in `CosmosClientOptions`
* 2. Provide the inference endpoint via `inferenceEndpoint` in `CosmosClientOptions`,
* or set the `AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT` environment variable
*
* @param context - The context (e.g. query string) to use for reranking the documents.
* @param documents - A list of documents (as JSON strings) to be reranked.
* @param options - Optional dictionary of settings for the reranking request.
* Known service options:
* - `return_documents` (boolean) — include reranked documents in the response.
* - `top_k` (number) — max number of top-ranked documents to return.
* - `batch_size` (number) — batch size for processing documents.
* - `sort` (boolean) — sort results by relevance score in descending order.
* - `document_type` (`"string"` | `"json"`) — type of documents being reranked.
* - `target_paths` (string) — comma-separated JSON paths (when document_type is `"json"`).
* - `abortSignal` (AbortSignal) — signal to cancel the request.
* Any additional keys are forwarded as-is to the inference service.
* @returns The reranking results including scored documents, latency, and token usage.
*
* @example Semantic reranking of query results
* ```ts snippet:ContainerSemanticRerank
* import { DefaultAzureCredential } from "@azure/identity";
* import { CosmosClient } from "@azure/cosmos";
*
* const endpoint = "https://your-account.documents.azure.com";
* const aadCredentials = new DefaultAzureCredential();
* const client = new CosmosClient({
* endpoint,
* aadCredentials,
* });
*
* const { database } = await client.databases.createIfNotExists({ id: "Test Database" });
* const { container } = await database.containers.createIfNotExists({ id: "Test Container" });
*
* const queryResults = ["doc1 JSON", "doc2 JSON", "doc3 JSON"];
* const result = await container.semanticRerank(
* "most economical with multiple adjustments",
* queryResults,
* { return_documents: true, top_k: 10, sort: true },
* );
* // Access the top-ranked document
* if (result.rerankScores.length > 0) {
* const topResult = result.rerankScores[0];
* const topScore = topResult.score;
* const topDocument = topResult.document;
* if (topDocument !== null) {
* console.log("Top-ranked document:", topDocument);
* }
* console.log("Top score:", topScore);
* }
* ```
*/
public async semanticRerank(
context: string,
documents: string[],
options?: SemanticRerankOptions,
): Promise<SemanticRerankResult> {
return this.clientContext.semanticRerank(context, documents, options);
}

/**
* @internal
*/
Expand Down
7 changes: 7 additions & 0 deletions sdk/cosmosdb/cosmos/src/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,13 @@ export const Constants = {
EncryptionCacheRefreshIntervalInMs: 60000, // 1 minute

RequestTimeoutForReadsInMs: 2000, // 2 seconds

// Inference Service
InferenceBasePath: "/inference/semanticReranking",
Copy link
Copy Markdown
Member

@niteshvijay1995 niteshvijay1995 May 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to InferenceConstants

InferenceUserAgent: "cosmos-inference-js",
InferenceDefaultScope: "https://dbinference.azure.com/.default",
InferenceDefaultTimeoutMs: 120_000, // 120 seconds
InferenceEndpointEnvVar: "AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT",
};

export const AAD_DEFAULT_SCOPE = "https://cosmos.azure.com/.default";
Expand Down
6 changes: 6 additions & 0 deletions sdk/cosmosdb/cosmos/src/common/statusCodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ export interface StatusCodesType {
Accepted: 202;
NoContent: 204;
MultiStatus: 207;

// Redirection
MultipleChoices: 300;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

avoid if possible

NotModified: 304;

// Client error
Expand Down Expand Up @@ -50,6 +53,9 @@ export const StatusCodes: StatusCodesType = {
Accepted: 202,
NoContent: 204,
MultiStatus: 207,

// Redirection
MultipleChoices: 300,
NotModified: 304,

// Client error
Expand Down
6 changes: 6 additions & 0 deletions sdk/cosmosdb/cosmos/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,9 @@ export {
type CosmosEncryptedNumber,
CosmosEncryptedNumberType,
} from "./encryption/index.js";

export type {
RerankScore,
SemanticRerankResult,
SemanticRerankOptions,
} from "./inference/index.js";
Loading
Loading