Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Microsoft.Azure.Cosmos/src/CosmosClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,15 @@ internal CosmosClient(
/// <remarks>This property is read-only. Modifying any options after the client has been created has no effect on the existing client instance.</remarks>
public virtual CosmosClientOptions ClientOptions => this.ClientContext.ClientOptions;

#if PREVIEW
/// <summary>
/// Gets the client-wide <see cref="ICosmosEmbeddingGenerator"/>, or <c>null</c> if none was set.
/// Set via <see cref="CosmosClientOptions.EmbeddingGenerator"/> or
/// <see cref="Fluent.CosmosClientBuilder.WithEmbeddingGenerator"/>.
/// </summary>
public virtual ICosmosEmbeddingGenerator EmbeddingGenerator => this.ClientContext.ClientOptions.EmbeddingGenerator;
Comment thread
ananth7592 marked this conversation as resolved.
#endif

/// <summary>
/// The response factory used to create CosmosClient response types.
/// </summary>
Expand Down
12 changes: 12 additions & 0 deletions Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,18 @@ public ConnectionMode ConnectionMode
#endif
ReadConsistencyStrategy? ReadConsistencyStrategy { get; set; }

/// <summary>
/// Gets or sets the client-wide default <see cref="ICosmosEmbeddingGenerator"/> used to generate
/// query-time vector embeddings for hybrid and vector-search queries.
/// </summary>
[JsonIgnore]
#if PREVIEW
public
#else
internal
#endif
ICosmosEmbeddingGenerator EmbeddingGenerator { get; set; }
Comment thread
ananth7592 marked this conversation as resolved.

/// <summary>
/// Sets the priority level for requests created using cosmos client.
/// </summary>
Expand Down
69 changes: 69 additions & 0 deletions Microsoft.Azure.Cosmos/src/CosmosEmbeddingResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System;
using System.Collections.Generic;

/// <summary>
/// The result of a call to <see cref="ICosmosEmbeddingGenerator.GenerateEmbeddingsAsync"/>.
/// Carries the generated float32 vectors plus optional diagnostic fields (token usage,
/// latency) the SDK surfaces through <c>CosmosDiagnostics</c>.
/// </summary>
#if PREVIEW
public
#else
internal
#endif
sealed class CosmosEmbeddingResult
Comment thread
ananth7592 marked this conversation as resolved.
{
/// <summary>
/// Initializes a new instance of <see cref="CosmosEmbeddingResult"/>.
/// </summary>
/// <param name="vectors">
/// The generated float32 embedding vectors, one per input string supplied to the
/// originating <see cref="ICosmosEmbeddingGenerator.GenerateEmbeddingsAsync"/> call,
/// in the same order as the inputs.
/// </param>
/// <param name="totalTokens">
/// Optional total token count consumed by the embedding service to produce these vectors.
/// Pass <c>null</c> when the underlying service does not report token usage.
/// </param>
/// <param name="latency">
/// Optional duration the implementation observed for the embedding service call (for
/// example, the wall-clock time around the underlying HTTP request). Surfaced through
/// <c>CosmosDiagnostics</c> for query-time observability. Pass <c>null</c> when the
/// implementation does not measure latency.
/// </param>
public CosmosEmbeddingResult(
IReadOnlyList<ReadOnlyMemory<float>> vectors,
int? totalTokens = null,
TimeSpan? latency = null)
{
this.Vectors = vectors ?? throw new ArgumentNullException(nameof(vectors));
this.TotalTokens = totalTokens;
this.Latency = latency;
}

/// <summary>
/// Gets the generated float32 embedding vectors, one per input string, in the same
/// order as the inputs supplied to <see cref="ICosmosEmbeddingGenerator.GenerateEmbeddingsAsync"/>.
/// </summary>
public IReadOnlyList<ReadOnlyMemory<float>> Vectors { get; }

/// <summary>
/// Gets the total number of tokens the embedding service consumed to generate
/// <see cref="Vectors"/>, or <c>null</c> when the underlying service does not report it.
/// </summary>
public int? TotalTokens { get; }

/// <summary>
/// Gets the duration the implementation observed for the underlying embedding service
/// call, or <c>null</c> when the implementation does not measure it. Surfaced through
/// <c>CosmosDiagnostics</c> for query-time observability.
/// </summary>
public TimeSpan? Latency { get; }
}
}
18 changes: 18 additions & 0 deletions Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -869,5 +869,23 @@ CosmosClientBuilder WithReadConsistencyStrategy(Cosmos.ReadConsistencyStrategy r
this.clientOptions.ReadConsistencyStrategy = readConsistencyStrategy;
return this;
}

/// <summary>
/// Sets the client-wide default <see cref="ICosmosEmbeddingGenerator"/> used to generate
/// query-time vector embeddings for hybrid and vector-search queries.
/// </summary>
/// <param name="embeddingGenerator">The embedding generator to use as the client-wide default.</param>
/// <returns>The current <see cref="CosmosClientBuilder"/>.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="embeddingGenerator"/> is <c>null</c>.</exception>
#if PREVIEW
public
#else
internal
#endif
CosmosClientBuilder WithEmbeddingGenerator(ICosmosEmbeddingGenerator embeddingGenerator)
{
this.clientOptions.EmbeddingGenerator = embeddingGenerator ?? throw new ArgumentNullException(nameof(embeddingGenerator));
return this;
}
}
}
113 changes: 113 additions & 0 deletions Microsoft.Azure.Cosmos/src/ICosmosEmbeddingGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;

/// <summary>
/// Defines a contract for generating float32 vector embeddings from input text strings
/// supplied by the Azure Cosmos DB query pipeline.
/// The SDK invokes this when a query plan contains <c>GenerateEmbeddings(...)</c> literals
/// (for example <c>VectorDistance(GenerateEmbeddings("big brown cat"), c.embedding)</c>).
/// Set a client-wide default via <c>CosmosClientOptions.EmbeddingGenerator</c> or
/// <c>CosmosClientBuilder.WithEmbeddingGenerator</c>. Implementations MUST be thread-safe and are
/// responsible for any caching, retries, and authentication required to call the underlying
/// embedding service.
/// </summary>
/// <remarks>
/// <para><b>Preview surface.</b> The SDK call site that invokes this method is delivered
/// in a follow-up release. Setting an instance via
/// <see cref="CosmosClientOptions.EmbeddingGenerator"/> or
/// <see cref="Fluent.CosmosClientBuilder.WithEmbeddingGenerator"/> has no runtime effect
/// today; the surface is shipped in this preview so customers can author and test
/// implementations against the contract ahead of the resolver landing.</para>
/// <para><b>Lifecycle and disposal.</b> The customer owns the generator instance. The SDK
/// keeps a reference for the lifetime of the configured <see cref="CosmosClient"/> (or the
/// <see cref="Container"/> reference it was bound to) but never disposes it. If the
/// implementation holds disposable resources (for example an <c>HttpClient</c> or an
/// <c>EmbeddingClient</c>), the customer is responsible for disposing them when their
/// application tears down.</para>
///
/// <para><b>Error semantics.</b> Implementations are responsible for handling transient
/// failures from the underlying embedding service (network errors, rate limiting, etc.)
/// via their own retry policy. The SDK does not retry calls to this method. Any exception
/// thrown by the implementation is wrapped into a <see cref="CosmosException"/> and
/// surfaced to the originating SDK caller.</para>
///
/// <para><b>Cancellation.</b> Implementations should honor the supplied
/// <see cref="CancellationToken"/> cooperatively wherever feasible (typically by forwarding
/// it to the underlying HTTP call). Best-effort cancellation is acceptable; ignoring the
/// token entirely is discouraged because it defeats caller-side timeouts.</para>
///
/// <para><b>Idempotency and concurrency.</b> The SDK may invoke this method multiple times
/// for the same inputs (for example during internal query retry) and may invoke it
/// concurrently from multiple threads. Implementations must be safe to call repeatedly
/// and from parallel callers, and must not assume per-call state. Note that each call
/// typically incurs cost at the underlying embedding service; implementations may cache
/// responses internally if they want to avoid duplicate billing for identical inputs.</para>
/// </remarks>
#if PREVIEW
public
#else
internal
#endif
interface ICosmosEmbeddingGenerator
Comment thread
ananth7592 marked this conversation as resolved.
{
/// <summary>
/// Generates an embedding vector for each of the supplied input strings.
/// </summary>
/// <param name="texts">
/// The input strings to embed, in the order the implementation MUST preserve in the
/// returned <see cref="CosmosEmbeddingResult.Vectors"/> (one vector per input, same
/// index). Typed as <see cref="IReadOnlyList{T}"/> so implementations can size their
/// outbound batch without re-enumeration and so the 1:1 ordered contract is encoded
/// in the signature.
/// </param>
/// <param name="endpoint">
/// The embedding service endpoint to call (for example the Azure OpenAI account endpoint).
/// Sourced from the container's <c>EmbeddingSource.Endpoint</c> when configured.
/// </param>
/// <param name="deploymentName">
/// The model deployment name to invoke at <paramref name="endpoint"/>. Sourced from the
/// container's <c>EmbeddingSource.DeploymentName</c> when configured.
/// </param>
/// <param name="dimensions">
/// The vector dimensionality the produced embeddings must match. For models that support
/// dimensionality reduction (for example <c>text-embedding-3-small</c> /
/// <c>text-embedding-3-large</c>), implementations MUST forward this value to the
/// underlying service so the returned vectors have the expected length; otherwise the
/// service returns its default size, which may not match the container's
/// <see cref="VectorEmbeddingPolicy"/>.
/// </param>
/// <param name="cancellationToken">
/// A <see cref="CancellationToken"/> propagated from the originating SDK call
/// (for example <c>FeedIterator.ReadNextAsync</c>). Implementations should honor cancellation.
/// </param>
/// <returns>
/// A task that resolves to a <see cref="CosmosEmbeddingResult"/> whose
/// <see cref="CosmosEmbeddingResult.Vectors"/> contains one float32 vector per input,
/// each of length <paramref name="dimensions"/>, in the same order as
/// <paramref name="texts"/>.
/// <para>
/// Query-time vectors are sent to the Azure Cosmos DB gateway as float32 regardless of
/// the container's stored <see cref="VectorDataType"/>. Implementations targeting
/// containers configured for <see cref="VectorDataType.Uint8"/>,
/// <see cref="VectorDataType.Int8"/>, or <see cref="VectorDataType.Float16"/> storage
/// should still produce float32 vectors here; the Azure Cosmos DB service applies the
/// configured quantization at write time. This contract
/// covers all four <see cref="VectorDataType"/> storage configurations supported by
/// the container's <see cref="VectorEmbeddingPolicy"/>.
/// </para>
/// </returns>
Task<CosmosEmbeddingResult> GenerateEmbeddingsAsync(
IReadOnlyList<string> texts,
string endpoint,
string deploymentName,
int dimensions,
CancellationToken cancellationToken = default);
}
}
Loading
Loading