llamastack
diff --git a/‎src/index.ts‎
Lines changed: 57 additions & 2 deletions b/‎src/index.ts‎
Lines changed: 57 additions & 2 deletions
diff --git a/‎src/resources/completions.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/resources/completions.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/resources/embeddings.ts‎
Lines changed: 116 additions & 0 deletions b/‎src/resources/embeddings.ts‎
Lines changed: 116 additions & 0 deletions
@@ -32,10 +32,20 @@ import {
   Datasets,
   ListDatasetsResponse,
 } from './resources/datasets';
+import { EmbeddingCreateParams, Embeddings, EmbeddingsResponse } from './resources/embeddings';
+import {
+  DeleteFileResponse,
+  File,
+  FileContentResponse,
+  FileCreateParams,
+  FileListParams,
+  Files,
+  ListFilesResponse,
+} from './resources/files';
 import {
   ChatCompletionResponseStreamChunk,
   CompletionResponse,
-  EmbeddingsResponse,
+  EmbeddingsResponse as InferenceAPIEmbeddingsResponse,
   Inference,
   InferenceBatchChatCompletionParams,
   InferenceBatchChatCompletionResponse,
@@ -174,6 +184,17 @@ import {
   ToolRuntimeListToolsParams,
   ToolRuntimeListToolsResponse,
 } from './resources/tool-runtime/tool-runtime';
+import {
+  ListVectorStoresResponse,
+  VectorStore,
+  VectorStoreCreateParams,
+  VectorStoreDeleteResponse,
+  VectorStoreListParams,
+  VectorStoreSearchParams,
+  VectorStoreSearchResponse,
+  VectorStoreUpdateParams,
+  VectorStores,
+} from './resources/vector-stores/vector-stores';
 
 export interface ClientOptions {
   /**
@@ -291,10 +312,12 @@ export class LlamaStackClient extends Core.APIClient {
   eval: API.Eval = new API.Eval(this);
   inspect: API.Inspect = new API.Inspect(this);
   inference: API.Inference = new API.Inference(this);
+  embeddings: API.Embeddings = new API.Embeddings(this);
   chat: API.Chat = new API.Chat(this);
   completions: API.Completions = new API.Completions(this);
   vectorIo: API.VectorIo = new API.VectorIo(this);
   vectorDBs: API.VectorDBs = new API.VectorDBs(this);
+  vectorStores: API.VectorStores = new API.VectorStores(this);
   models: API.Models = new API.Models(this);
   postTraining: API.PostTraining = new API.PostTraining(this);
   providers: API.Providers = new API.Providers(this);
@@ -306,6 +329,7 @@ export class LlamaStackClient extends Core.APIClient {
   scoring: API.Scoring = new API.Scoring(this);
   scoringFunctions: API.ScoringFunctions = new API.ScoringFunctions(this);
   benchmarks: API.Benchmarks = new API.Benchmarks(this);
+  files: API.Files = new API.Files(this);
 
   protected override defaultQuery(): Core.DefaultQuery | undefined {
     return this._options.defaultQuery;
@@ -359,10 +383,12 @@ LlamaStackClient.Datasets = Datasets;
 LlamaStackClient.Eval = Eval;
 LlamaStackClient.Inspect = Inspect;
 LlamaStackClient.Inference = Inference;
+LlamaStackClient.Embeddings = Embeddings;
 LlamaStackClient.Chat = Chat;
 LlamaStackClient.Completions = Completions;
 LlamaStackClient.VectorIo = VectorIo;
 LlamaStackClient.VectorDBs = VectorDBs;
+LlamaStackClient.VectorStores = VectorStores;
 LlamaStackClient.Models = Models;
 LlamaStackClient.PostTraining = PostTraining;
 LlamaStackClient.Providers = Providers;
@@ -374,6 +400,7 @@ LlamaStackClient.Telemetry = Telemetry;
 LlamaStackClient.Scoring = Scoring;
 LlamaStackClient.ScoringFunctions = ScoringFunctions;
 LlamaStackClient.Benchmarks = Benchmarks;
+LlamaStackClient.Files = Files;
 export declare namespace LlamaStackClient {
   export type RequestOptions = Core.RequestOptions;
 
@@ -465,7 +492,7 @@ export declare namespace LlamaStackClient {
     Inference as Inference,
     type ChatCompletionResponseStreamChunk as ChatCompletionResponseStreamChunk,
     type CompletionResponse as CompletionResponse,
-    type EmbeddingsResponse as EmbeddingsResponse,
+    type InferenceAPIEmbeddingsResponse as EmbeddingsResponse,
     type TokenLogProbs as TokenLogProbs,
     type InferenceBatchChatCompletionResponse as InferenceBatchChatCompletionResponse,
     type InferenceBatchChatCompletionParams as InferenceBatchChatCompletionParams,
@@ -479,6 +506,12 @@ export declare namespace LlamaStackClient {
     type InferenceEmbeddingsParams as InferenceEmbeddingsParams,
   };
 
+  export {
+    Embeddings as Embeddings,
+    type EmbeddingsResponse as EmbeddingsResponse,
+    type EmbeddingCreateParams as EmbeddingCreateParams,
+  };
+
   export { Chat as Chat, type ChatCompletionChunk as ChatCompletionChunk };
 
   export {
@@ -505,6 +538,18 @@ export declare namespace LlamaStackClient {
     type VectorDBRegisterParams as VectorDBRegisterParams,
   };
 
+  export {
+    VectorStores as VectorStores,
+    type ListVectorStoresResponse as ListVectorStoresResponse,
+    type VectorStore as VectorStore,
+    type VectorStoreDeleteResponse as VectorStoreDeleteResponse,
+    type VectorStoreSearchResponse as VectorStoreSearchResponse,
+    type VectorStoreCreateParams as VectorStoreCreateParams,
+    type VectorStoreUpdateParams as VectorStoreUpdateParams,
+    type VectorStoreListParams as VectorStoreListParams,
+    type VectorStoreSearchParams as VectorStoreSearchParams,
+  };
+
   export {
     Models as Models,
     type ListModelsResponse as ListModelsResponse,
@@ -597,6 +642,16 @@ export declare namespace LlamaStackClient {
     type BenchmarkRegisterParams as BenchmarkRegisterParams,
   };
 
+  export {
+    Files as Files,
+    type DeleteFileResponse as DeleteFileResponse,
+    type File as File,
+    type ListFilesResponse as ListFilesResponse,
+    type FileContentResponse as FileContentResponse,
+    type FileCreateParams as FileCreateParams,
+    type FileListParams as FileListParams,
+  };
+
   export type AgentConfig = API.AgentConfig;
   export type BatchCompletion = API.BatchCompletion;
   export type ChatCompletionResponse = API.ChatCompletionResponse;
 
@@ -223,6 +223,11 @@ export interface CompletionCreateParamsBase {
    */
   stream_options?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
 
+  /**
+   * (Optional) The suffix that should be appended to the completion.
+   */
+  suffix?: string;
+
   /**
    * (Optional) The temperature to use.
    */
 
@@ -0,0 +1,116 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import { APIResource } from '../resource';
+import * as Core from '../core';
+
+export class Embeddings extends APIResource {
+  /**
+   * Generate OpenAI-compatible embeddings for the given input using the specified
+   * model.
+   */
+  create(body: EmbeddingCreateParams, options?: Core.RequestOptions): Core.APIPromise<EmbeddingsResponse> {
+    return this._client.post('/v1/openai/v1/embeddings', { body, ...options });
+  }
+}
+
+/**
+ * Response from an OpenAI-compatible embeddings request.
+ */
+export interface EmbeddingsResponse {
+  /**
+   * List of embedding data objects
+   */
+  data: Array<EmbeddingsResponse.Data>;
+
+  /**
+   * The model that was used to generate the embeddings
+   */
+  model: string;
+
+  /**
+   * The object type, which will be "list"
+   */
+  object: 'list';
+
+  /**
+   * Usage information
+   */
+  usage: EmbeddingsResponse.Usage;
+}
+
+export namespace EmbeddingsResponse {
+  /**
+   * A single embedding data object from an OpenAI-compatible embeddings response.
+   */
+  export interface Data {
+    /**
+     * The embedding vector as a list of floats (when encoding_format="float") or as a
+     * base64-encoded string (when encoding_format="base64")
+     */
+    embedding: Array<number> | string;
+
+    /**
+     * The index of the embedding in the input list
+     */
+    index: number;
+
+    /**
+     * The object type, which will be "embedding"
+     */
+    object: 'embedding';
+  }
+
+  /**
+   * Usage information
+   */
+  export interface Usage {
+    /**
+     * The number of tokens in the input
+     */
+    prompt_tokens: number;
+
+    /**
+     * The total number of tokens used
+     */
+    total_tokens: number;
+  }
+}
+
+export interface EmbeddingCreateParams {
+  /**
+   * Input text to embed, encoded as a string or array of strings. To embed multiple
+   * inputs in a single request, pass an array of strings.
+   */
+  input: string | Array<string>;
+
+  /**
+   * The identifier of the model to use. The model must be an embedding model
+   * registered with Llama Stack and available via the /models endpoint.
+   */
+  model: string;
+
+  /**
+   * (Optional) The number of dimensions the resulting output embeddings should have.
+   * Only supported in text-embedding-3 and later models.
+   */
+  dimensions?: number;
+
+  /**
+   * (Optional) The format to return the embeddings in. Can be either "float" or
+   * "base64". Defaults to "float".
+   */
+  encoding_format?: string;
+
+  /**
+   * (Optional) A unique identifier representing your end-user, which can help OpenAI
+   * to monitor and detect abuse.
+   */
+  user?: string;
+}
+
+export declare namespace Embeddings {
+  export {
+    type EmbeddingsResponse as EmbeddingsResponse,
+    type EmbeddingCreateParams as EmbeddingCreateParams,
+  };
+}