-
Notifications
You must be signed in to change notification settings - Fork 1.4k
feat(cosmos): add semantic rerank API #37981
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e9996a7
6205899
3372c72
c364a51
b543c0b
d3d8848
6cef892
66f912a
d0f5c75
d0bd299
8e4a686
fa52444
c9362d8
0b39a3d
4a9ca54
327f057
26fc878
783699b
3a833d7
ad6a614
ce691bb
36c5605
fed6fed
45acf97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -320,6 +320,8 @@ | |
| "Parition", | ||
| "colls", | ||
| "pkranges", | ||
| "rerank", | ||
| "Rerank", | ||
| "sproc", | ||
| "sprocs", | ||
| "udfs", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -317,6 +317,7 @@ export class ClientContext { | |
| diagnosticNode: DiagnosticNodeInternal; | ||
| partitionKeyRangeId?: string; | ||
| }): Promise<Response_2<T & Resource>>; | ||
| semanticRerank(context: string, documents: string[], options?: SemanticRerankOptions): Promise<SemanticRerankResult>; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check if context can be renamed. |
||
| // (undocumented) | ||
| upsert<T, U = T>(input: { | ||
| body: T; | ||
|
|
@@ -700,6 +701,11 @@ export const Constants: { | |
| DefaultEncryptionCacheTimeToLiveInSeconds: number; | ||
| EncryptionCacheRefreshIntervalInMs: number; | ||
| RequestTimeoutForReadsInMs: number; | ||
| InferenceBasePath: string; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we have a separate class for InferenceConstants. |
||
| InferenceUserAgent: string; | ||
| InferenceDefaultScope: string; | ||
| InferenceDefaultTimeoutMs: number; | ||
| InferenceEndpointEnvVar: string; | ||
| }; | ||
|
|
||
| // @public | ||
|
|
@@ -731,6 +737,7 @@ export class Container { | |
| readPartitionKeyRanges(feedOptions?: FeedOptions): QueryIterator<PartitionKeyRange>; | ||
| replace(body: ContainerDefinition, options?: RequestOptions): Promise<ContainerResponse>; | ||
| get scripts(): Scripts; | ||
| semanticRerank(context: string, documents: string[], options?: SemanticRerankOptions): Promise<SemanticRerankResult>; | ||
| get url(): string; | ||
| } | ||
|
|
||
|
|
@@ -824,6 +831,7 @@ export interface CosmosClientOptions { | |
| diagnosticLevel?: CosmosDbDiagnosticLevel; | ||
| endpoint?: string; | ||
| httpClient?: HttpClient; | ||
| inferenceEndpoint?: string; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove it for now?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Confirm it with Sajee |
||
| key?: string; | ||
| permissionFeed?: PermissionDefinition[]; | ||
| resourceTokens?: { | ||
|
|
@@ -2133,6 +2141,13 @@ export interface RequestOptions extends SharedOptions { | |
| urlConnection?: string; | ||
| } | ||
|
|
||
| // @public | ||
| export interface RerankScore { | ||
| document: string | null; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. null or undefined |
||
| index: number; | ||
| score: number; | ||
| } | ||
|
|
||
| // @public (undocumented) | ||
| export interface Resource { | ||
| _etag: string; | ||
|
|
@@ -2377,6 +2392,17 @@ export class Scripts { | |
| get userDefinedFunctions(): UserDefinedFunctions; | ||
| } | ||
|
|
||
| // @public | ||
| export type SemanticRerankOptions = Record<string, unknown>; | ||
|
|
||
| // @public | ||
| export interface SemanticRerankResult { | ||
| headers: Record<string, string>; | ||
| latency: Record<string, unknown> | undefined; | ||
| rerankScores: RerankScore[]; | ||
| tokenUsage: Record<string, unknown> | undefined; | ||
| } | ||
|
|
||
| // @public | ||
| export function setAuthorizationTokenHeaderUsingMasterKey(verb: HTTPMethod, resourceId: string, resourceType: ResourceType, headers: CosmosHeaders, masterKey: string): Promise<void>; | ||
|
|
||
|
|
@@ -2462,6 +2488,8 @@ export interface StatusCodesType { | |
| // (undocumented) | ||
| MethodNotAllowed: 405; | ||
| // (undocumented) | ||
| MultipleChoices: 300; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if internally handling it, then do not add it here |
||
| // (undocumented) | ||
| MultiStatus: 207; | ||
| // (undocumented) | ||
| NoContent: 204; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,9 @@ import { | |
| AAD_AUTH_PREFIX, | ||
| AAD_RESOURCE_NOT_FOUND_ERROR, | ||
| } from "./common/constants.js"; | ||
| import { InferenceService } from "./inference/InferenceService.js"; | ||
| import type { SemanticRerankOptions } from "./inference/SemanticRerankOptions.js"; | ||
| import type { SemanticRerankResult } from "./inference/SemanticRerankResult.js"; | ||
|
|
||
| const logger: AzureLogger = createClientLogger("ClientContext"); | ||
|
|
||
|
|
@@ -70,6 +73,7 @@ export class ClientContext { | |
| public partitionKeyRangeCache: PartitionKeyRangeCache; | ||
| /** boolean flag to support operations with client-side encryption */ | ||
| public enableEncryption: boolean = false; | ||
| private inferenceService: InferenceService | null = null; | ||
|
|
||
| public constructor( | ||
| private cosmosClientOptions: CosmosClientOptions, | ||
|
|
@@ -1108,4 +1112,45 @@ export class ClientContext { | |
| this.globalEndpointManager.lastKnownPPCBEnabled | ||
| ); | ||
| } | ||
|
|
||
| /** | ||
| * Rerank a list of documents using semantic reranking via the Cosmos DB Inference Service. | ||
| * This method uses a semantic reranker to score and reorder the provided documents | ||
| * based on their relevance to the given reranking context. | ||
| * | ||
| * The semantic reranking requests use a separate HTTP pipeline and do not use | ||
| * the default SDK retry policies. | ||
| * | ||
| * @param rerankContext - The context (e.g. query string) to use for reranking. | ||
| * @param documents - The documents to be reranked. | ||
| * @param options - Optional settings for the reranking request. | ||
| * @returns The reranking results including scores, latency, and token usage. | ||
| */ | ||
| public async semanticRerank( | ||
| context: string, | ||
| documents: string[], | ||
| options?: SemanticRerankOptions, | ||
| ): Promise<SemanticRerankResult> { | ||
| const service = this.getOrCreateInferenceService(); | ||
| return service.semanticRerank(context, documents, options); | ||
| } | ||
|
|
||
| /** | ||
| * Gets or lazily creates the InferenceService instance. | ||
| * @internal | ||
| */ | ||
| private getOrCreateInferenceService(): InferenceService { | ||
| if (!this.inferenceService) { | ||
| this.inferenceService = new InferenceService(this.cosmosClientOptions); | ||
| } | ||
| return this.inferenceService; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use ?? |
||
| } | ||
|
|
||
| /** | ||
| * Disposes the InferenceService if it was created. | ||
| * @internal | ||
| */ | ||
| public disposeInferenceService(): void { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Redundant |
||
| this.inferenceService = null; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -304,6 +304,13 @@ export const Constants = { | |
| EncryptionCacheRefreshIntervalInMs: 60000, // 1 minute | ||
|
|
||
| RequestTimeoutForReadsInMs: 2000, // 2 seconds | ||
|
|
||
| // Inference Service | ||
| InferenceBasePath: "/inference/semanticReranking", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move to InferenceConstants |
||
| InferenceUserAgent: "cosmos-inference-js", | ||
| InferenceDefaultScope: "https://dbinference.azure.com/.default", | ||
| InferenceDefaultTimeoutMs: 120_000, // 120 seconds | ||
| InferenceEndpointEnvVar: "AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT", | ||
| }; | ||
|
|
||
| export const AAD_DEFAULT_SCOPE = "https://cosmos.azure.com/.default"; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,9 @@ export interface StatusCodesType { | |
| Accepted: 202; | ||
| NoContent: 204; | ||
| MultiStatus: 207; | ||
|
|
||
| // Redirection | ||
| MultipleChoices: 300; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. avoid if possible |
||
| NotModified: 304; | ||
|
|
||
| // Client error | ||
|
|
@@ -50,6 +53,9 @@ export const StatusCodes: StatusCodesType = { | |
| Accepted: 202, | ||
| NoContent: 204, | ||
| MultiStatus: 207, | ||
|
|
||
| // Redirection | ||
| MultipleChoices: 300, | ||
| NotModified: 304, | ||
|
|
||
| // Client error | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Remove