-
Notifications
You must be signed in to change notification settings - Fork 539
EmbeddingGenerator: Adds ICosmosEmbeddingGenerator client-wide configuration (preview) #5838
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| //------------------------------------------------------------ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| //------------------------------------------------------------ | ||
|
|
||
| namespace Microsoft.Azure.Cosmos | ||
| { | ||
| using System; | ||
| using System.Collections.Generic; | ||
|
|
||
| /// <summary> | ||
| /// The result of a call to <see cref="ICosmosEmbeddingGenerator.GenerateEmbeddingsAsync"/>. | ||
| /// Carries the generated float32 vectors plus optional diagnostic fields (token usage, | ||
| /// latency) the SDK surfaces through <c>CosmosDiagnostics</c>. | ||
| /// </summary> | ||
| #if PREVIEW | ||
| public | ||
| #else | ||
| internal | ||
| #endif | ||
| sealed class CosmosEmbeddingResult | ||
|
ananth7592 marked this conversation as resolved.
|
||
| { | ||
| /// <summary> | ||
| /// Initializes a new instance of <see cref="CosmosEmbeddingResult"/>. | ||
| /// </summary> | ||
| /// <param name="vectors"> | ||
| /// The generated float32 embedding vectors, one per input string supplied to the | ||
| /// originating <see cref="ICosmosEmbeddingGenerator.GenerateEmbeddingsAsync"/> call, | ||
| /// in the same order as the inputs. | ||
| /// </param> | ||
| /// <param name="totalTokens"> | ||
| /// Optional total token count consumed by the embedding service to produce these vectors. | ||
| /// Pass <c>null</c> when the underlying service does not report token usage. | ||
| /// </param> | ||
| /// <param name="latency"> | ||
| /// Optional duration the implementation observed for the embedding service call (for | ||
| /// example, the wall-clock time around the underlying HTTP request). Surfaced through | ||
| /// <c>CosmosDiagnostics</c> for query-time observability. Pass <c>null</c> when the | ||
| /// implementation does not measure latency. | ||
| /// </param> | ||
| public CosmosEmbeddingResult( | ||
| IReadOnlyList<ReadOnlyMemory<float>> vectors, | ||
| int? totalTokens = null, | ||
| TimeSpan? latency = null) | ||
| { | ||
| this.Vectors = vectors ?? throw new ArgumentNullException(nameof(vectors)); | ||
| this.TotalTokens = totalTokens; | ||
| this.Latency = latency; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Gets the generated float32 embedding vectors, one per input string, in the same | ||
| /// order as the inputs supplied to <see cref="ICosmosEmbeddingGenerator.GenerateEmbeddingsAsync"/>. | ||
| /// </summary> | ||
| public IReadOnlyList<ReadOnlyMemory<float>> Vectors { get; } | ||
|
|
||
| /// <summary> | ||
| /// Gets the total number of tokens the embedding service consumed to generate | ||
| /// <see cref="Vectors"/>, or <c>null</c> when the underlying service does not report it. | ||
| /// </summary> | ||
| public int? TotalTokens { get; } | ||
|
|
||
| /// <summary> | ||
| /// Gets the duration the implementation observed for the underlying embedding service | ||
| /// call, or <c>null</c> when the implementation does not measure it. Surfaced through | ||
| /// <c>CosmosDiagnostics</c> for query-time observability. | ||
| /// </summary> | ||
| public TimeSpan? Latency { get; } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
113 changes: 113 additions & 0 deletions
113
Microsoft.Azure.Cosmos/src/ICosmosEmbeddingGenerator.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| //------------------------------------------------------------ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| //------------------------------------------------------------ | ||
|
|
||
| namespace Microsoft.Azure.Cosmos | ||
| { | ||
| using System.Collections.Generic; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
|
|
||
| /// <summary> | ||
| /// Defines a contract for generating float32 vector embeddings from input text strings | ||
| /// supplied by the Azure Cosmos DB query pipeline. | ||
| /// The SDK invokes this when a query plan contains <c>GenerateEmbeddings(...)</c> literals | ||
| /// (for example <c>VectorDistance(GenerateEmbeddings("big brown cat"), c.embedding)</c>). | ||
| /// Set a client-wide default via <c>CosmosClientOptions.EmbeddingGenerator</c> or | ||
| /// <c>CosmosClientBuilder.WithEmbeddingGenerator</c>. Implementations MUST be thread-safe and are | ||
| /// responsible for any caching, retries, and authentication required to call the underlying | ||
| /// embedding service. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// <para><b>Preview surface.</b> The SDK call site that invokes this method is delivered | ||
| /// in a follow-up release. Setting an instance via | ||
| /// <see cref="CosmosClientOptions.EmbeddingGenerator"/> or | ||
| /// <see cref="Fluent.CosmosClientBuilder.WithEmbeddingGenerator"/> has no runtime effect | ||
| /// today; the surface is shipped in this preview so customers can author and test | ||
| /// implementations against the contract ahead of the resolver landing.</para> | ||
| /// <para><b>Lifecycle and disposal.</b> The customer owns the generator instance. The SDK | ||
| /// keeps a reference for the lifetime of the configured <see cref="CosmosClient"/> (or the | ||
| /// <see cref="Container"/> reference it was bound to) but never disposes it. If the | ||
| /// implementation holds disposable resources (for example an <c>HttpClient</c> or an | ||
| /// <c>EmbeddingClient</c>), the customer is responsible for disposing them when their | ||
| /// application tears down.</para> | ||
| /// | ||
| /// <para><b>Error semantics.</b> Implementations are responsible for handling transient | ||
| /// failures from the underlying embedding service (network errors, rate limiting, etc.) | ||
| /// via their own retry policy. The SDK does not retry calls to this method. Any exception | ||
| /// thrown by the implementation is wrapped into a <see cref="CosmosException"/> and | ||
| /// surfaced to the originating SDK caller.</para> | ||
| /// | ||
| /// <para><b>Cancellation.</b> Implementations should honor the supplied | ||
| /// <see cref="CancellationToken"/> cooperatively wherever feasible (typically by forwarding | ||
| /// it to the underlying HTTP call). Best-effort cancellation is acceptable; ignoring the | ||
| /// token entirely is discouraged because it defeats caller-side timeouts.</para> | ||
| /// | ||
| /// <para><b>Idempotency and concurrency.</b> The SDK may invoke this method multiple times | ||
| /// for the same inputs (for example during internal query retry) and may invoke it | ||
| /// concurrently from multiple threads. Implementations must be safe to call repeatedly | ||
| /// and from parallel callers, and must not assume per-call state. Note that each call | ||
| /// typically incurs cost at the underlying embedding service; implementations may cache | ||
| /// responses internally if they want to avoid duplicate billing for identical inputs.</para> | ||
| /// </remarks> | ||
| #if PREVIEW | ||
| public | ||
| #else | ||
| internal | ||
| #endif | ||
| interface ICosmosEmbeddingGenerator | ||
|
ananth7592 marked this conversation as resolved.
|
||
| { | ||
| /// <summary> | ||
| /// Generates an embedding vector for each of the supplied input strings. | ||
| /// </summary> | ||
| /// <param name="texts"> | ||
| /// The input strings to embed, in the order the implementation MUST preserve in the | ||
| /// returned <see cref="CosmosEmbeddingResult.Vectors"/> (one vector per input, same | ||
| /// index). Typed as <see cref="IReadOnlyList{T}"/> so implementations can size their | ||
| /// outbound batch without re-enumeration and so the 1:1 ordered contract is encoded | ||
| /// in the signature. | ||
| /// </param> | ||
| /// <param name="endpoint"> | ||
| /// The embedding service endpoint to call (for example the Azure OpenAI account endpoint). | ||
| /// Sourced from the container's <c>EmbeddingSource.Endpoint</c> when configured. | ||
| /// </param> | ||
| /// <param name="deploymentName"> | ||
| /// The model deployment name to invoke at <paramref name="endpoint"/>. Sourced from the | ||
| /// container's <c>EmbeddingSource.DeploymentName</c> when configured. | ||
| /// </param> | ||
| /// <param name="dimensions"> | ||
| /// The vector dimensionality the produced embeddings must match. For models that support | ||
| /// dimensionality reduction (for example <c>text-embedding-3-small</c> / | ||
| /// <c>text-embedding-3-large</c>), implementations MUST forward this value to the | ||
| /// underlying service so the returned vectors have the expected length; otherwise the | ||
| /// service returns its default size, which may not match the container's | ||
| /// <see cref="VectorEmbeddingPolicy"/>. | ||
| /// </param> | ||
| /// <param name="cancellationToken"> | ||
| /// A <see cref="CancellationToken"/> propagated from the originating SDK call | ||
| /// (for example <c>FeedIterator.ReadNextAsync</c>). Implementations should honor cancellation. | ||
| /// </param> | ||
| /// <returns> | ||
| /// A task that resolves to a <see cref="CosmosEmbeddingResult"/> whose | ||
| /// <see cref="CosmosEmbeddingResult.Vectors"/> contains one float32 vector per input, | ||
| /// each of length <paramref name="dimensions"/>, in the same order as | ||
| /// <paramref name="texts"/>. | ||
| /// <para> | ||
| /// Query-time vectors are sent to the Azure Cosmos DB gateway as float32 regardless of | ||
| /// the container's stored <see cref="VectorDataType"/>. Implementations targeting | ||
| /// containers configured for <see cref="VectorDataType.Uint8"/>, | ||
| /// <see cref="VectorDataType.Int8"/>, or <see cref="VectorDataType.Float16"/> storage | ||
| /// should still produce float32 vectors here; the Azure Cosmos DB service applies the | ||
| /// configured quantization at write time. This contract | ||
| /// covers all four <see cref="VectorDataType"/> storage configurations supported by | ||
| /// the container's <see cref="VectorEmbeddingPolicy"/>. | ||
| /// </para> | ||
| /// </returns> | ||
| Task<CosmosEmbeddingResult> GenerateEmbeddingsAsync( | ||
| IReadOnlyList<string> texts, | ||
| string endpoint, | ||
| string deploymentName, | ||
| int dimensions, | ||
| CancellationToken cancellationToken = default); | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.