From 74e2976c1e67c245b74f40f5f5e3e125c7faebc8 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 15:27:47 -0700 Subject: [PATCH 1/9] feat(api)!: support passing extra_body to embeddings and vector_stores APIs Applies the same pattern from #3777 to embeddings and vector_stores.create() endpoints. Breaking change: Method signatures now accept a single params object with Pydantic extra="allow" instead of individual parameters. Provider-specific params can be passed via extra_body and accessed through params.model_extra. Updated APIs: openai_embeddings(), openai_create_vector_store(), openai_create_vector_store_file_batch() --- docs/static/deprecated-llama-stack-spec.html | 49 ++++++++------ docs/static/deprecated-llama-stack-spec.yaml | 63 ++++++++++------- docs/static/llama-stack-spec.html | 49 ++++++++------ docs/static/llama-stack-spec.yaml | 63 ++++++++++------- docs/static/stainless-llama-stack-spec.html | 49 ++++++++------ docs/static/stainless-llama-stack-spec.yaml | 63 ++++++++++------- llama_stack/apis/inference/inference.py | 31 ++++++--- llama_stack/apis/vector_io/vector_io.py | 67 +++++++++++++------ llama_stack/core/routers/inference.py | 23 +++---- .../remote/inference/bedrock/bedrock.py | 7 +- .../remote/inference/cerebras/cerebras.py | 11 ++- .../inference/llama_openai_compat/llama.py | 7 +- .../remote/inference/nvidia/nvidia.py | 17 ++--- .../inference/passthrough/passthrough.py | 7 +- .../providers/remote/inference/tgi/tgi.py | 11 ++- .../remote/inference/together/together.py | 23 +++---- .../utils/inference/embedding_mixin.py | 15 ++--- .../utils/inference/litellm_openai_mixin.py | 15 ++--- .../providers/utils/inference/openai_mixin.py | 31 +++++---- .../utils/memory/openai_vector_store_mixin.py | 60 ++++++++--------- 20 files changed, 364 insertions(+), 297 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 8c4c80014c..a3638749d4 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -1662,7 +1662,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -2436,13 +2436,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -2622,7 +2622,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -2638,7 +2638,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -8174,7 +8174,7 @@ "title": "OpenAICompletionChoice", "description": "A choice from an OpenAI-compatible completion response." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -8197,6 +8197,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -8213,7 +8214,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -12061,19 +12063,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -12099,7 +12101,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -12125,7 +12127,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -12151,23 +12153,25 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." + "description": "Set of key-value pairs that can be attached to the vector store" }, "embedding_model": { "type": "string", - "description": "The embedding model to use for this vector store." + "description": "(Optional) The embedding model to use for this vector store" }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." + "default": 384, + "description": "(Optional) The dimension of the embedding vectors (default: 384)" }, "provider_id": { "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -12337,7 +12341,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -12345,7 +12349,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -12371,18 +12375,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index e8cc035da4..3cbcbc82b7 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1203,7 +1203,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/files: @@ -1792,13 +1792,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/vector_stores/{vector_store_id}: @@ -1924,7 +1927,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -1937,7 +1944,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: true /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -6035,7 +6042,7 @@ components: title: OpenAICompletionChoice description: >- A choice from an OpenAI-compatible completion response. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -6054,6 +6061,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -6071,7 +6079,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -9147,19 +9157,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -9171,7 +9180,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -9183,8 +9192,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -9196,21 +9204,25 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. + Set of key-value pairs that can be attached to the vector store embedding_model: type: string description: >- - The embedding model to use for this vector store. + (Optional) The embedding model to use for this vector store embedding_dimension: type: integer + default: 384 description: >- - The dimension of the embedding vectors (default: 384). + (Optional) The dimension of the embedding vectors (default: 384) provider_id: type: string description: >- - The ID of the provider to use for this vector store. + (Optional) The ID of the provider to use for this vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -9331,7 +9343,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -9339,7 +9351,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -9351,16 +9363,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 38ec9dd1cf..c1aa41abce 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -765,7 +765,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -3170,13 +3170,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -3356,7 +3356,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -3372,7 +3372,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -6324,7 +6324,7 @@ "title": "ConversationItemDeletedResource", "description": "Response for deleted conversation item." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -6347,6 +6347,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -6363,7 +6364,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -12587,19 +12589,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -12625,7 +12627,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -12651,7 +12653,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -12677,23 +12679,25 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." + "description": "Set of key-value pairs that can be attached to the vector store" }, "embedding_model": { "type": "string", - "description": "The embedding model to use for this vector store." + "description": "(Optional) The embedding model to use for this vector store" }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." + "default": 384, + "description": "(Optional) The dimension of the embedding vectors (default: 384)" }, "provider_id": { "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -12863,7 +12867,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -12871,7 +12875,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -12897,18 +12901,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ad766d9d54..a66ceede83 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -617,7 +617,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: false /v1/files: @@ -2413,13 +2413,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}: @@ -2545,7 +2548,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -2558,7 +2565,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -4797,7 +4804,7 @@ components: - deleted title: ConversationItemDeletedResource description: Response for deleted conversation item. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -4816,6 +4823,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -4833,7 +4841,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -9612,19 +9622,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -9636,7 +9645,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -9648,8 +9657,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -9661,21 +9669,25 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. + Set of key-value pairs that can be attached to the vector store embedding_model: type: string description: >- - The embedding model to use for this vector store. + (Optional) The embedding model to use for this vector store embedding_dimension: type: integer + default: 384 description: >- - The dimension of the embedding vectors (default: 384). + (Optional) The dimension of the embedding vectors (default: 384) provider_id: type: string description: >- - The ID of the provider to use for this vector store. + (Optional) The ID of the provider to use for this vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -9796,7 +9808,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -9804,7 +9816,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -9816,16 +9828,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 657705a1c5..717352b35b 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -765,7 +765,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiEmbeddingsRequest" + "$ref": "#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody" } } }, @@ -3170,13 +3170,13 @@ "VectorIO" ], "summary": "Creates a vector store.", - "description": "Creates a vector store.", + "description": "Creates a vector store.\nGenerate an OpenAI-compatible vector store with the given parameters.", "parameters": [], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody" } } }, @@ -3356,7 +3356,7 @@ "VectorIO" ], "summary": "Create a vector store file batch.", - "description": "Create a vector store file batch.", + "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", "parameters": [ { "name": "vector_store_id", @@ -3372,7 +3372,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest" + "$ref": "#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody" } } }, @@ -8333,7 +8333,7 @@ "title": "ConversationItemDeletedResource", "description": "Response for deleted conversation item." }, - "OpenaiEmbeddingsRequest": { + "OpenAIEmbeddingsRequestWithExtraBody": { "type": "object", "properties": { "model": { @@ -8356,6 +8356,7 @@ }, "encoding_format": { "type": "string", + "default": "float", "description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"." }, "dimensions": { @@ -8372,7 +8373,8 @@ "model", "input" ], - "title": "OpenaiEmbeddingsRequest" + "title": "OpenAIEmbeddingsRequestWithExtraBody", + "description": "Request parameters for OpenAI-compatible embeddings endpoint." }, "OpenAIEmbeddingData": { "type": "object", @@ -14596,19 +14598,19 @@ "title": "VectorStoreObject", "description": "OpenAI Vector Store object." }, - "OpenaiCreateVectorStoreRequest": { + "OpenAICreateVectorStoreRequestWithExtraBody": { "type": "object", "properties": { "name": { "type": "string", - "description": "A name for the vector store." + "description": "(Optional) A name for the vector store" }, "file_ids": { "type": "array", "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files." + "description": "List of file IDs to include in the vector store" }, "expires_after": { "type": "object", @@ -14634,7 +14636,7 @@ } ] }, - "description": "The expiration policy for a vector store." + "description": "(Optional) Expiration policy for the vector store" }, "chunking_strategy": { "type": "object", @@ -14660,7 +14662,7 @@ } ] }, - "description": "The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy." + "description": "(Optional) Strategy for splitting files into chunks" }, "metadata": { "type": "object", @@ -14686,23 +14688,25 @@ } ] }, - "description": "Set of 16 key-value pairs that can be attached to an object." + "description": "Set of key-value pairs that can be attached to the vector store" }, "embedding_model": { "type": "string", - "description": "The embedding model to use for this vector store." + "description": "(Optional) The embedding model to use for this vector store" }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding vectors (default: 384)." + "default": 384, + "description": "(Optional) The dimension of the embedding vectors (default: 384)" }, "provider_id": { "type": "string", - "description": "The ID of the provider to use for this vector store." + "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, - "title": "OpenaiCreateVectorStoreRequest" + "title": "OpenAICreateVectorStoreRequestWithExtraBody", + "description": "Request to create a vector store with extra_body support." }, "OpenaiUpdateVectorStoreRequest": { "type": "object", @@ -14872,7 +14876,7 @@ "title": "VectorStoreChunkingStrategyStaticConfig", "description": "Configuration for static chunking strategy." }, - "OpenaiCreateVectorStoreFileBatchRequest": { + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { "file_ids": { @@ -14880,7 +14884,7 @@ "items": { "type": "string" }, - "description": "A list of File IDs that the vector store should use." + "description": "A list of File IDs that the vector store should use" }, "attributes": { "type": "object", @@ -14906,18 +14910,19 @@ } ] }, - "description": "(Optional) Key-value attributes to store with the files." + "description": "(Optional) Key-value attributes to store with the files" }, "chunking_strategy": { "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto." + "description": "(Optional) The chunking strategy used to chunk the file(s). Defaults to auto" } }, "additionalProperties": false, "required": [ "file_ids" ], - "title": "OpenaiCreateVectorStoreFileBatchRequest" + "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "description": "Request to create a vector store file batch with extra_body support." }, "VectorStoreFileBatchObject": { "type": "object", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index d5dcbe7d35..4a5f7c8e0d 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -620,7 +620,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiEmbeddingsRequest' + $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true deprecated: false /v1/files: @@ -2416,13 +2416,16 @@ paths: tags: - VectorIO summary: Creates a vector store. - description: Creates a vector store. + description: >- + Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. parameters: [] requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}: @@ -2548,7 +2551,11 @@ paths: tags: - VectorIO summary: Create a vector store file batch. - description: Create a vector store file batch. + description: >- + Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector + store. parameters: - name: vector_store_id in: path @@ -2561,7 +2568,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/OpenaiCreateVectorStoreFileBatchRequest' + $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: @@ -6242,7 +6249,7 @@ components: - deleted title: ConversationItemDeletedResource description: Response for deleted conversation item. - OpenaiEmbeddingsRequest: + OpenAIEmbeddingsRequestWithExtraBody: type: object properties: model: @@ -6261,6 +6268,7 @@ components: multiple inputs in a single request, pass an array of strings. encoding_format: type: string + default: float description: >- (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". @@ -6278,7 +6286,9 @@ components: required: - model - input - title: OpenaiEmbeddingsRequest + title: OpenAIEmbeddingsRequestWithExtraBody + description: >- + Request parameters for OpenAI-compatible embeddings endpoint. OpenAIEmbeddingData: type: object properties: @@ -11057,19 +11067,18 @@ components: - metadata title: VectorStoreObject description: OpenAI Vector Store object. - OpenaiCreateVectorStoreRequest: + "OpenAICreateVectorStoreRequestWithExtraBody": type: object properties: name: type: string - description: A name for the vector store. + description: (Optional) A name for the vector store file_ids: type: array items: type: string description: >- - A list of File IDs that the vector store should use. Useful for tools - like `file_search` that can access files. + List of file IDs to include in the vector store expires_after: type: object additionalProperties: @@ -11081,7 +11090,7 @@ components: - type: array - type: object description: >- - The expiration policy for a vector store. + (Optional) Expiration policy for the vector store chunking_strategy: type: object additionalProperties: @@ -11093,8 +11102,7 @@ components: - type: array - type: object description: >- - The chunking strategy used to chunk the file(s). If not set, will use - the `auto` strategy. + (Optional) Strategy for splitting files into chunks metadata: type: object additionalProperties: @@ -11106,21 +11114,25 @@ components: - type: array - type: object description: >- - Set of 16 key-value pairs that can be attached to an object. + Set of key-value pairs that can be attached to the vector store embedding_model: type: string description: >- - The embedding model to use for this vector store. + (Optional) The embedding model to use for this vector store embedding_dimension: type: integer + default: 384 description: >- - The dimension of the embedding vectors (default: 384). + (Optional) The dimension of the embedding vectors (default: 384) provider_id: type: string description: >- - The ID of the provider to use for this vector store. + (Optional) The ID of the provider to use for this vector store additionalProperties: false - title: OpenaiCreateVectorStoreRequest + title: >- + OpenAICreateVectorStoreRequestWithExtraBody + description: >- + Request to create a vector store with extra_body support. OpenaiUpdateVectorStoreRequest: type: object properties: @@ -11241,7 +11253,7 @@ components: title: VectorStoreChunkingStrategyStaticConfig description: >- Configuration for static chunking strategy. - OpenaiCreateVectorStoreFileBatchRequest: + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: file_ids: @@ -11249,7 +11261,7 @@ components: items: type: string description: >- - A list of File IDs that the vector store should use. + A list of File IDs that the vector store should use attributes: type: object additionalProperties: @@ -11261,16 +11273,19 @@ components: - type: array - type: object description: >- - (Optional) Key-value attributes to store with the files. + (Optional) Key-value attributes to store with the files chunking_strategy: $ref: '#/components/schemas/VectorStoreChunkingStrategy' description: >- (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto. + auto additionalProperties: false required: - file_ids - title: OpenaiCreateVectorStoreFileBatchRequest + title: >- + OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: >- + Request to create a vector store file batch with extra_body support. VectorStoreFileBatchObject: type: object properties: diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 3c1aa1f63d..0272464703 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1140,6 +1140,25 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): user: str | None = None +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): + """Request parameters for OpenAI-compatible embeddings endpoint. + + :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. + :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. + :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". + :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + """ + + model: str + input: str | list[str] + encoding_format: str | None = "float" + dimensions: int | None = None + user: str | None = None + + @runtime_checkable @trace_protocol class InferenceProvider(Protocol): @@ -1200,21 +1219,11 @@ async def openai_chat_completion( @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], ) -> OpenAIEmbeddingsResponse: """Create embeddings. Generate OpenAI-compatible embeddings for the given input using the specified model. - - :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. - :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. - :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". - :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. - :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. :returns: An OpenAIEmbeddingsResponse containing the embeddings. """ ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 238889099b..4a13f0ebf7 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -11,6 +11,7 @@ import uuid from typing import Annotated, Any, Literal, Protocol, runtime_checkable +from fastapi import Body from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent @@ -466,6 +467,46 @@ class VectorStoreFilesListInBatchResponse(BaseModel): has_more: bool = False +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store with extra_body support. + + :param name: (Optional) A name for the vector store + :param file_ids: List of file IDs to include in the vector store + :param expires_after: (Optional) Expiration policy for the vector store + :param chunking_strategy: (Optional) Strategy for splitting files into chunks + :param metadata: Set of key-value pairs that can be attached to the vector store + :param embedding_model: (Optional) The embedding model to use for this vector store + :param embedding_dimension: (Optional) The dimension of the embedding vectors (default: 384) + :param provider_id: (Optional) The ID of the provider to use for this vector store + """ + + name: str | None = None + file_ids: list[str] | None = None + expires_after: dict[str, Any] | None = None + chunking_strategy: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None + embedding_model: str | None = None + embedding_dimension: int | None = 384 + provider_id: str | None = None + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store file batch with extra_body support. + + :param file_ids: A list of File IDs that the vector store should use + :param attributes: (Optional) Key-value attributes to store with the files + :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto + """ + + file_ids: list[str] + attributes: dict[str, Any] | None = None + chunking_strategy: VectorStoreChunkingStrategy | None = None + + class VectorDBStore(Protocol): def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... @@ -516,25 +557,11 @@ async def query_chunks( @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) async def openai_create_vector_store( self, - name: str | None = None, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = 384, - provider_id: str | None = None, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: """Creates a vector store. - :param name: A name for the vector store. - :param file_ids: A list of File IDs that the vector store should use. Useful for tools like `file_search` that can access files. - :param expires_after: The expiration policy for a vector store. - :param chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. - :param metadata: Set of 16 key-value pairs that can be attached to an object. - :param embedding_model: The embedding model to use for this vector store. - :param embedding_dimension: The dimension of the embedding vectors (default: 384). - :param provider_id: The ID of the provider to use for this vector store. + Generate an OpenAI-compatible vector store with the given parameters. :returns: A VectorStoreObject representing the created vector store. """ ... @@ -827,16 +854,12 @@ async def openai_delete_vector_store_file( async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch. + Generate an OpenAI-compatible vector store file batch for the given vector store. :param vector_store_id: The ID of the vector store to create the file batch for. - :param file_ids: A list of File IDs that the vector store should use. - :param attributes: (Optional) Key-value attributes to store with the files. - :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto. :returns: A VectorStoreFileBatchObject representing the created file batch. """ ... diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index e16d08371b..b20ad44ca0 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -40,6 +40,7 @@ OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAICompletionWithInputMessages, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIMessageParam, Order, @@ -279,26 +280,18 @@ async def openai_chat_completion( async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], ) -> OpenAIEmbeddingsResponse: logger.debug( - f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}", - ) - model_obj = await self._get_model(model, ModelType.embedding) - params = dict( - model=model_obj.identifier, - input=input, - encoding_format=encoding_format, - dimensions=dimensions, - user=user, + f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}", ) + model_obj = await self._get_model(params.model, ModelType.embedding) + + # Update model to use resolved identifier + params.model = model_obj.identifier provider = await self.routing_table.get_provider_impl(model_obj.identifier) - return await provider.openai_embeddings(**params) + return await provider.openai_embeddings(params) async def list_chat_completions( self, diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 057ed758bf..d266f9e6f7 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -14,6 +14,7 @@ Inference, OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.inference.inference import ( @@ -124,11 +125,7 @@ async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 0e24af0ee9..daf67616b2 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,7 +6,10 @@ from urllib.parse import urljoin -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import CerebrasImplConfig @@ -20,10 +23,6 @@ def get_base_url(self) -> str: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index e5fb3c77fe..05d6e8cc84 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -7,6 +7,7 @@ from llama_stack.apis.inference.inference import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.log import get_logger @@ -40,10 +41,6 @@ async def openai_completion( async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 9d8d1089a3..37864b0408 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -9,6 +9,7 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) @@ -78,11 +79,7 @@ def get_base_url(self) -> str: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ OpenAI-compatible embeddings for NVIDIA NIM. @@ -99,11 +96,11 @@ async def openai_embeddings( ) response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, - dimensions=dimensions if dimensions is not None else NOT_GIVEN, - user=user if user is not None else NOT_GIVEN, + model=await self._get_provider_model_id(params.model), + input=params.input, + encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN, + dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN, + user=params.user if params.user is not None else NOT_GIVEN, extra_body=extra_body, ) diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index 11306095bf..4d4d4f41da 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -16,6 +16,7 @@ OpenAIChatCompletionRequestWithExtraBody, OpenAICompletion, OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.models import Model @@ -69,11 +70,7 @@ def _get_client(self) -> AsyncLlamaStackClient: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index da3205a132..6ae7b25449 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -10,7 +10,10 @@ from huggingface_hub import AsyncInferenceClient, HfApi from pydantic import SecretStr -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -40,11 +43,7 @@ async def list_provider_model_ids(self) -> Iterable[str]: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index e29cccf04b..e31ebf7c54 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -11,6 +11,7 @@ from together.constants import BASE_URL from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage @@ -62,11 +63,7 @@ async def list_provider_model_ids(self) -> Iterable[str]: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ Together's OpenAI-compatible embeddings endpoint is not compatible with @@ -78,25 +75,27 @@ async def openai_embeddings( - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions """ # Together support ticket #13332 -> will not fix - if user is not None: + if params.user is not None: raise ValueError("Together's embeddings endpoint does not support user param.") # Together support ticket #13333 -> escalated - if dimensions is not None: + if params.dimensions is not None: raise ValueError("Together's embeddings endpoint does not support dimensions param.") response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format, + model=await self._get_provider_model_id(params.model), + input=params.input, + encoding_format=params.encoding_format, ) - response.model = model # return the user the same model id they provided, avoid exposing the provider model id + response.model = ( + params.model + ) # return the user the same model id they provided, avoid exposing the provider model id # Together support ticket #13330 -> escalated # - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information if not hasattr(response, "usage") or response.usage is None: logger.warning( - f"Together's embedding endpoint for {model} did not return usage information, substituting -1s." + f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s." ) response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py index facc59f65b..3759434429 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/llama_stack/providers/utils/inference/embedding_mixin.py @@ -17,6 +17,7 @@ from llama_stack.apis.inference import ( ModelStore, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) @@ -32,26 +33,22 @@ class SentenceTransformerEmbeddingMixin: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: # Convert input to list format if it's a single string - input_list = [input] if isinstance(input, str) else input + input_list = [params.input] if isinstance(params.input, str) else params.input if not input_list: raise ValueError("Empty list not supported") # Get the model and generate embeddings - model_obj = await self.model_store.get_model(model) + model_obj = await self.model_store.get_model(params.model) embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id) embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False) # Convert embeddings to the requested format data = [] for i, embedding in enumerate(embeddings): - if encoding_format == "base64": + if params.encoding_format == "base64": # Convert float array to base64 string float_bytes = struct.pack(f"{len(embedding)}f", *embedding) embedding_value = base64.b64encode(float_bytes).decode("ascii") @@ -70,7 +67,7 @@ async def openai_embeddings( usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) return OpenAIEmbeddingsResponse( data=data, - model=model, + model=params.model, usage=usage, ) diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index d1be1789a8..42b89f8970 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -20,6 +20,7 @@ OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ToolChoice, @@ -189,16 +190,12 @@ def get_api_key(self) -> str: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: - model_obj = await self.model_store.get_model(model) + model_obj = await self.model_store.get_model(params.model) # Convert input to list if it's a string - input_list = [input] if isinstance(input, str) else input + input_list = [params.input] if isinstance(params.input, str) else params.input # Call litellm embedding function # litellm.drop_params = True @@ -207,11 +204,11 @@ async def openai_embeddings( input=input_list, api_key=self.get_api_key(), api_base=self.api_base, - dimensions=dimensions, + dimensions=params.dimensions, ) # Convert response to OpenAI format - data = b64_encode_openai_embeddings_response(response.data, encoding_format) + data = b64_encode_openai_embeddings_response(response.data, params.encoding_format) usage = OpenAIEmbeddingUsage( prompt_tokens=response["usage"]["prompt_tokens"], diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index 863ea161c5..11c0b6829c 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -21,6 +21,7 @@ OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, OpenAIMessageParam, @@ -316,23 +317,27 @@ async def _localize_image_url(m: OpenAIMessageParam) -> OpenAIMessageParam: async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: """ Direct OpenAI embeddings API call. """ + # Prepare request parameters + request_params = { + "model": await self._get_provider_model_id(params.model), + "input": params.input, + "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, + "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, + "user": params.user if params.user is not None else NOT_GIVEN, + } + + # Add extra_body if present + extra_body = params.model_extra + if extra_body: + request_params["extra_body"] = extra_body + # Call OpenAI embeddings API with properly typed parameters - response = await self.client.embeddings.create( - model=await self._get_provider_model_id(model), - input=input, - encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, - dimensions=dimensions if dimensions is not None else NOT_GIVEN, - user=user if user is not None else NOT_GIVEN, - ) + response = await self.client.embeddings.create(**request_params) data = [] for i, embedding_data in enumerate(response.data): @@ -350,7 +355,7 @@ async def openai_embeddings( return OpenAIEmbeddingsResponse( data=data, - model=model, + model=params.model, usage=usage, ) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index ddfef9ba25..2d8d14512e 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -19,6 +19,8 @@ from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, SearchRankingOptions, VectorStoreChunkingStrategy, @@ -340,39 +342,37 @@ async def query_chunks( async def openai_create_vector_store( self, - name: str | None = None, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - provider_vector_db_id: str | None = None, + params: OpenAICreateVectorStoreRequestWithExtraBody, ) -> VectorStoreObject: """Creates a vector store.""" created_at = int(time.time()) + + # Extract provider_vector_db_id from extra_body if present + provider_vector_db_id = None + if params.model_extra and "provider_vector_db_id" in params.model_extra: + provider_vector_db_id = params.model_extra["provider_vector_db_id"] + # Derive the canonical vector_db_id (allow override, else generate) vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") - if provider_id is None: + if params.provider_id is None: raise ValueError("Provider ID is required") - if embedding_model is None: + if params.embedding_model is None: raise ValueError("Embedding model is required") # Embedding dimension is required (defaulted to 384 if not provided) - if embedding_dimension is None: + if params.embedding_dimension is None: raise ValueError("Embedding dimension is required") # Register the VectorDB backing this vector store vector_db = VectorDB( identifier=vector_db_id, - embedding_dimension=embedding_dimension, - embedding_model=embedding_model, - provider_id=provider_id, + embedding_dimension=params.embedding_dimension, + embedding_model=params.embedding_model, + provider_id=params.provider_id, provider_resource_id=vector_db_id, - vector_db_name=name, + vector_db_name=params.name, ) await self.register_vector_db(vector_db) @@ -391,21 +391,21 @@ async def openai_create_vector_store( "id": vector_db_id, "object": "vector_store", "created_at": created_at, - "name": name, + "name": params.name, "usage_bytes": 0, "file_counts": file_counts.model_dump(), "status": status, - "expires_after": expires_after, + "expires_after": params.expires_after, "expires_at": None, "last_active_at": created_at, "file_ids": [], - "chunking_strategy": chunking_strategy, + "chunking_strategy": params.chunking_strategy, } # Add provider information to metadata if provided - metadata = metadata or {} - if provider_id: - metadata["provider_id"] = provider_id + metadata = params.metadata or {} + if params.provider_id: + metadata["provider_id"] = params.provider_id if provider_vector_db_id: metadata["provider_vector_db_id"] = provider_vector_db_id store_info["metadata"] = metadata @@ -417,7 +417,7 @@ async def openai_create_vector_store( self.openai_vector_stores[vector_db_id] = store_info # Now that our vector store is created, attach any files that were provided - file_ids = file_ids or [] + file_ids = params.file_ids or [] tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] await asyncio.gather(*tasks) @@ -976,15 +976,13 @@ async def openai_delete_vector_store_file( async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody, ) -> VectorStoreFileBatchObject: """Create a vector store file batch.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) - chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto() + chunking_strategy = params.chunking_strategy or VectorStoreChunkingStrategyAuto() created_at = int(time.time()) batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}") @@ -996,8 +994,8 @@ async def openai_create_vector_store_file_batch( completed=0, cancelled=0, failed=0, - in_progress=len(file_ids), - total=len(file_ids), + in_progress=len(params.file_ids), + total=len(params.file_ids), ) # Create batch object immediately with in_progress status @@ -1011,8 +1009,8 @@ async def openai_create_vector_store_file_batch( batch_info = { **batch_object.model_dump(), - "file_ids": file_ids, - "attributes": attributes, + "file_ids": params.file_ids, + "attributes": params.attributes, "chunking_strategy": chunking_strategy.model_dump(), "expires_at": expires_at, } From 8fa91f98ef8cddb6ff8642ad168736253b27b96e Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 17:02:23 -0700 Subject: [PATCH 2/9] fix(router): update VectorIORouter to use new params signature VectorIORouter was still using old individual parameter signature instead of the new params object. Updated both openai_create_vector_store and openai_create_vector_store_file_batch methods to match the API protocol. --- llama_stack/core/routers/vector_io.py | 58 +++++++++++++-------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 0e3f9d8d9c..d2145f3b1c 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -6,12 +6,16 @@ import asyncio import uuid -from typing import Any +from typing import Annotated, Any + +from fastapi import Body from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.models import ModelType from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, SearchRankingOptions, VectorIO, @@ -120,18 +124,13 @@ async def query_chunks( # OpenAI Vector Stores API endpoints async def openai_create_vector_store( self, - name: str, - file_ids: list[str] | None = None, - expires_after: dict[str, Any] | None = None, - chunking_strategy: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - embedding_model: str | None = None, - embedding_dimension: int | None = None, - provider_id: str | None = None, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: - logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}") + logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={params.provider_id}") # If no embedding model is provided, use the first available one + embedding_model = params.embedding_model + embedding_dimension = params.embedding_dimension if embedding_model is None: embedding_model_info = await self._get_first_embedding_model() if embedding_model_info is None: @@ -144,22 +143,23 @@ async def openai_create_vector_store( vector_db_id=vector_db_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension, - provider_id=provider_id, + provider_id=params.provider_id, provider_vector_db_id=vector_db_id, - vector_db_name=name, + vector_db_name=params.name, ) provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) - return await provider.openai_create_vector_store( - name=name, - file_ids=file_ids, - expires_after=expires_after, - chunking_strategy=chunking_strategy, - metadata=metadata, - embedding_model=embedding_model, - embedding_dimension=embedding_dimension, - provider_id=registered_vector_db.provider_id, - provider_vector_db_id=registered_vector_db.provider_resource_id, - ) + + # Update params with resolved values + params.embedding_model = embedding_model + params.embedding_dimension = embedding_dimension + params.provider_id = registered_vector_db.provider_id + + # Add provider_vector_db_id to extra_body if not already there + if params.model_extra is None: + params.model_extra = {} + params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id + + return await provider.openai_create_vector_store(params) async def openai_list_vector_stores( self, @@ -370,16 +370,14 @@ async def health(self) -> dict[str, HealthResponse]: async def openai_create_vector_store_file_batch( self, vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: - logger.debug(f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(file_ids)} files") + logger.debug( + f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" + ) return await self.routing_table.openai_create_vector_store_file_batch( vector_store_id=vector_store_id, - file_ids=file_ids, - attributes=attributes, - chunking_strategy=chunking_strategy, + params=params, ) async def openai_retrieve_vector_store_file_batch( From 3568ccdc819541054e2d2da02c627dcdc988c475 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 17:21:43 -0700 Subject: [PATCH 3/9] fix: extract llama-stack params from model_extra, not as explicit fields --- llama_stack/apis/vector_io/vector_io.py | 6 ----- llama_stack/core/routers/vector_io.py | 23 +++++++--------- .../utils/memory/openai_vector_store_mixin.py | 26 ++++++++++--------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 4a13f0ebf7..3ced81bdd6 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -477,9 +477,6 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): :param expires_after: (Optional) Expiration policy for the vector store :param chunking_strategy: (Optional) Strategy for splitting files into chunks :param metadata: Set of key-value pairs that can be attached to the vector store - :param embedding_model: (Optional) The embedding model to use for this vector store - :param embedding_dimension: (Optional) The dimension of the embedding vectors (default: 384) - :param provider_id: (Optional) The ID of the provider to use for this vector store """ name: str | None = None @@ -487,9 +484,6 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): expires_after: dict[str, Any] | None = None chunking_strategy: dict[str, Any] | None = None metadata: dict[str, Any] | None = None - embedding_model: str | None = None - embedding_dimension: int | None = 384 - provider_id: str | None = None # extra_body can be accessed via .model_extra diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index d2145f3b1c..b779df1b7e 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -126,11 +126,15 @@ async def openai_create_vector_store( self, params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: - logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={params.provider_id}") + # Extract llama-stack-specific parameters from extra_body + extra = params.model_extra or {} + embedding_model = extra.get("embedding_model") + embedding_dimension = extra.get("embedding_dimension", 384) + provider_id = extra.get("provider_id") + + logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={provider_id}") # If no embedding model is provided, use the first available one - embedding_model = params.embedding_model - embedding_dimension = params.embedding_dimension if embedding_model is None: embedding_model_info = await self._get_first_embedding_model() if embedding_model_info is None: @@ -143,22 +147,13 @@ async def openai_create_vector_store( vector_db_id=vector_db_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension, - provider_id=params.provider_id, + provider_id=provider_id, provider_vector_db_id=vector_db_id, vector_db_name=params.name, ) provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) - # Update params with resolved values - params.embedding_model = embedding_model - params.embedding_dimension = embedding_dimension - params.provider_id = registered_vector_db.provider_id - - # Add provider_vector_db_id to extra_body if not already there - if params.model_extra is None: - params.model_extra = {} - params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id - + # Pass params as-is to provider - it will extract what it needs from model_extra return await provider.openai_create_vector_store(params) async def openai_list_vector_stores( diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 2d8d14512e..992a29664f 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -347,30 +347,32 @@ async def openai_create_vector_store( """Creates a vector store.""" created_at = int(time.time()) - # Extract provider_vector_db_id from extra_body if present - provider_vector_db_id = None - if params.model_extra and "provider_vector_db_id" in params.model_extra: - provider_vector_db_id = params.model_extra["provider_vector_db_id"] + # Extract llama-stack-specific parameters from extra_body + extra = params.model_extra or {} + provider_vector_db_id = extra.get("provider_vector_db_id") + embedding_model = extra.get("embedding_model") + embedding_dimension = extra.get("embedding_dimension", 384) + provider_id = extra.get("provider_id") # Derive the canonical vector_db_id (allow override, else generate) vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") - if params.provider_id is None: + if provider_id is None: raise ValueError("Provider ID is required") - if params.embedding_model is None: + if embedding_model is None: raise ValueError("Embedding model is required") # Embedding dimension is required (defaulted to 384 if not provided) - if params.embedding_dimension is None: + if embedding_dimension is None: raise ValueError("Embedding dimension is required") # Register the VectorDB backing this vector store vector_db = VectorDB( identifier=vector_db_id, - embedding_dimension=params.embedding_dimension, - embedding_model=params.embedding_model, - provider_id=params.provider_id, + embedding_dimension=embedding_dimension, + embedding_model=embedding_model, + provider_id=provider_id, provider_resource_id=vector_db_id, vector_db_name=params.name, ) @@ -404,8 +406,8 @@ async def openai_create_vector_store( # Add provider information to metadata if provided metadata = params.metadata or {} - if params.provider_id: - metadata["provider_id"] = params.provider_id + if provider_id: + metadata["provider_id"] = provider_id if provider_vector_db_id: metadata["provider_vector_db_id"] = provider_vector_db_id store_info["metadata"] = metadata From 58fcaa445e02d9f5c487fae9793438a852e4d3ed Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 17:37:10 -0700 Subject: [PATCH 4/9] fix: update VectorIORouter to get provider before calling openai_* methods VectorDBsRoutingTable was removed in a165b8b5, so VectorIORouter needs to get the provider directly using routing_table.get_provider_impl() before calling provider methods, consistent with how insert_chunks() already works. --- llama_stack/core/routers/vector_io.py | 42 ++++++++++++------ .../test_vector_io_openai_vector_stores.py | 43 +++++++------------ 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index b779df1b7e..41d32b10cc 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -214,7 +214,8 @@ async def openai_retrieve_vector_store( vector_store_id: str, ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}") - return await self.routing_table.openai_retrieve_vector_store(vector_store_id) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store(vector_store_id) async def openai_update_vector_store( self, @@ -224,7 +225,8 @@ async def openai_update_vector_store( metadata: dict[str, Any] | None = None, ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") - return await self.routing_table.openai_update_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store( vector_store_id=vector_store_id, name=name, expires_after=expires_after, @@ -236,7 +238,8 @@ async def openai_delete_vector_store( vector_store_id: str, ) -> VectorStoreDeleteResponse: logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") - return await self.routing_table.openai_delete_vector_store(vector_store_id) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store(vector_store_id) async def openai_search_vector_store( self, @@ -249,7 +252,8 @@ async def openai_search_vector_store( search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}") - return await self.routing_table.openai_search_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_search_vector_store( vector_store_id=vector_store_id, query=query, filters=filters, @@ -267,7 +271,8 @@ async def openai_attach_file_to_vector_store( chunking_strategy: VectorStoreChunkingStrategy | None = None, ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") - return await self.routing_table.openai_attach_file_to_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_attach_file_to_vector_store( vector_store_id=vector_store_id, file_id=file_id, attributes=attributes, @@ -284,7 +289,8 @@ async def openai_list_files_in_vector_store( filter: VectorStoreFileStatus | None = None, ) -> list[VectorStoreFileObject]: logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") - return await self.routing_table.openai_list_files_in_vector_store( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store( vector_store_id=vector_store_id, limit=limit, order=order, @@ -299,7 +305,8 @@ async def openai_retrieve_vector_store_file( file_id: str, ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}") - return await self.routing_table.openai_retrieve_vector_store_file( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file( vector_store_id=vector_store_id, file_id=file_id, ) @@ -310,7 +317,8 @@ async def openai_retrieve_vector_store_file_contents( file_id: str, ) -> VectorStoreFileContentsResponse: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") - return await self.routing_table.openai_retrieve_vector_store_file_contents( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, file_id=file_id, ) @@ -322,7 +330,8 @@ async def openai_update_vector_store_file( attributes: dict[str, Any], ) -> VectorStoreFileObject: logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}") - return await self.routing_table.openai_update_vector_store_file( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_file( vector_store_id=vector_store_id, file_id=file_id, attributes=attributes, @@ -334,7 +343,8 @@ async def openai_delete_vector_store_file( file_id: str, ) -> VectorStoreFileDeleteResponse: logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}") - return await self.routing_table.openai_delete_vector_store_file( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_file( vector_store_id=vector_store_id, file_id=file_id, ) @@ -370,7 +380,8 @@ async def openai_create_vector_store_file_batch( logger.debug( f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" ) - return await self.routing_table.openai_create_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch( vector_store_id=vector_store_id, params=params, ) @@ -381,7 +392,8 @@ async def openai_retrieve_vector_store_file_batch( vector_store_id: str, ) -> VectorStoreFileBatchObject: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}") - return await self.routing_table.openai_retrieve_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_batch( batch_id=batch_id, vector_store_id=vector_store_id, ) @@ -397,7 +409,8 @@ async def openai_list_files_in_vector_store_file_batch( order: str | None = "desc", ) -> VectorStoreFilesListInBatchResponse: logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}") - return await self.routing_table.openai_list_files_in_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store_file_batch( batch_id=batch_id, vector_store_id=vector_store_id, after=after, @@ -413,7 +426,8 @@ async def openai_cancel_vector_store_file_batch( vector_store_id: str, ) -> VectorStoreFileBatchObject: logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}") - return await self.routing_table.openai_cancel_vector_store_file_batch( + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_cancel_vector_store_file_batch( batch_id=batch_id, vector_store_id=vector_store_id, ) diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index ed0934224a..a5c491f533 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -15,6 +15,7 @@ from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, QueryChunksResponse, VectorStoreChunkingStrategyAuto, VectorStoreFileObject, @@ -326,8 +327,7 @@ async def test_create_vector_store_file_batch(vector_io_adapter): vector_io_adapter._process_file_batch_async = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) assert batch.vector_store_id == store_id @@ -354,8 +354,7 @@ async def test_retrieve_vector_store_file_batch(vector_io_adapter): # Create batch first created_batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Retrieve batch @@ -388,8 +387,7 @@ async def test_cancel_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Cancel batch @@ -434,8 +432,7 @@ async def test_list_files_in_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # List files @@ -454,8 +451,7 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test nonexistent vector store with pytest.raises(VectorStoreNotFoundError): await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id="nonexistent", - file_ids=["file_1"], + vector_store_id="nonexistent", params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) # Setup store for remaining tests @@ -472,8 +468,7 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test wrong vector store for batch vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=["file_1"], + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) # Create wrong_store so it exists but the batch doesn't belong to it @@ -520,8 +515,7 @@ async def test_file_batch_pagination(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Test pagination with limit @@ -593,8 +587,7 @@ async def test_file_batch_status_filtering(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Test filtering by completed status @@ -636,8 +629,7 @@ async def test_cancel_completed_batch_fails(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Manually update status to completed @@ -671,8 +663,7 @@ async def test_file_batch_persistence_across_restarts(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) batch_id = batch.id @@ -727,8 +718,7 @@ async def test_cancelled_batch_persists_in_storage(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) batch_id = batch.id @@ -775,10 +765,10 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create multiple batches batch1 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, file_ids=["file_1"] + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) batch2 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, file_ids=["file_2"] + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_2"]) ) # Complete one batch (should persist with completed status) @@ -791,7 +781,7 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create a third batch that stays in progress batch3 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, file_ids=["file_3"] + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_3"]) ) # Simulate restart - clear memory and reload from persistence @@ -952,8 +942,7 @@ async def mock_attach_file_with_delay(vector_store_id: str, file_id: str, **kwar file_ids = [f"file_{i}" for i in range(8)] # 8 files, but limit should be 5 batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, - file_ids=file_ids, + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Give time for the semaphore logic to start processing files From bf59d26362b6dfa4606d44db9dd627a36aee1cfd Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 21:52:30 -0700 Subject: [PATCH 5/9] fixes --- llama_stack/core/routers/vector_io.py | 11 ++++++++++- .../utils/memory/openai_vector_store_mixin.py | 7 ++++--- llama_stack/providers/utils/memory/vector_store.py | 14 ++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 41d32b10cc..27e56fef2b 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -135,6 +135,8 @@ async def openai_create_vector_store( logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={provider_id}") # If no embedding model is provided, use the first available one + # TODO: this branch will soon be deleted so you _must_ provide the embedding_model when + # creating a vector store if embedding_model is None: embedding_model_info = await self._get_first_embedding_model() if embedding_model_info is None: @@ -153,7 +155,14 @@ async def openai_create_vector_store( ) provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) - # Pass params as-is to provider - it will extract what it needs from model_extra + # Update model_extra with registered values so provider uses the already-registered vector_db + if params.model_extra is None: + params.model_extra = {} + params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id + params.model_extra["provider_id"] = registered_vector_db.provider_id + params.model_extra["embedding_model"] = embedding_model + params.model_extra["embedding_dimension"] = embedding_dimension + return await provider.openai_create_vector_store(params) async def openai_list_vector_stores( diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 992a29664f..70bcbba320 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -10,8 +10,9 @@ import time import uuid from abc import ABC, abstractmethod -from typing import Any +from typing import Annotated, Any +from fastapi import Body from pydantic import TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError @@ -342,7 +343,7 @@ async def query_chunks( async def openai_create_vector_store( self, - params: OpenAICreateVectorStoreRequestWithExtraBody, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], ) -> VectorStoreObject: """Creates a vector store.""" created_at = int(time.time()) @@ -978,7 +979,7 @@ async def openai_delete_vector_store_file( async def openai_create_vector_store_file_batch( self, vector_store_id: str, - params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch.""" if vector_store_id not in self.openai_vector_stores: diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index c0534a8756..0375ecaaa4 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -21,6 +21,7 @@ URL, InterleavedContent, ) +from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse @@ -274,10 +275,11 @@ async def insert_chunks( _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) if chunks_to_embed: - resp = await self.inference_api.openai_embeddings( - self.vector_db.embedding_model, - [c.content for c in chunks_to_embed], + params = OpenAIEmbeddingsRequestWithExtraBody( + model=self.vector_db.embedding_model, + input=[c.content for c in chunks_to_embed], ) + resp = await self.inference_api.openai_embeddings(params) for c, data in zip(chunks_to_embed, resp.data, strict=False): c.embedding = data.embedding @@ -316,7 +318,11 @@ async def query_chunks( if mode == "keyword": return await self.index.query_keyword(query_string, k, score_threshold) - embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string]) + params = OpenAIEmbeddingsRequestWithExtraBody( + model=self.vector_db.embedding_model, + input=[query_string], + ) + embeddings_response = await self.inference_api.openai_embeddings(params) query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32) if mode == "hybrid": return await self.index.query_hybrid( From e5a1cdf554df629efb391a50266949f1c86f599b Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 11 Oct 2025 22:05:20 -0700 Subject: [PATCH 6/9] more fixes --- docs/static/deprecated-llama-stack-spec.html | 30 ++++--------------- docs/static/deprecated-llama-stack-spec.yaml | 27 ++++------------- docs/static/llama-stack-spec.html | 30 ++++--------------- docs/static/llama-stack-spec.yaml | 27 ++++------------- docs/static/stainless-llama-stack-spec.html | 30 ++++--------------- docs/static/stainless-llama-stack-spec.yaml | 27 ++++------------- llama_stack/apis/vector_io/vector_io.py | 4 +-- llama_stack/core/routers/vector_io.py | 12 +++----- .../utils/memory/openai_vector_store_mixin.py | 2 +- 9 files changed, 43 insertions(+), 146 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index a3638749d4..b833817491 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -2623,17 +2623,7 @@ ], "summary": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", - "parameters": [ - { - "name": "vector_store_id", - "in": "path", - "description": "The ID of the vector store to create the file batch for.", - "required": true, - "schema": { - "type": "string" - } - } - ], + "parameters": [], "requestBody": { "content": { "application/json": { @@ -12154,19 +12144,6 @@ ] }, "description": "Set of key-value pairs that can be attached to the vector store" - }, - "embedding_model": { - "type": "string", - "description": "(Optional) The embedding model to use for this vector store" - }, - "embedding_dimension": { - "type": "integer", - "default": 384, - "description": "(Optional) The dimension of the embedding vectors (default: 384)" - }, - "provider_id": { - "type": "string", - "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, @@ -12344,6 +12321,10 @@ "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { + "vector_store_id": { + "type": "string", + "description": "The ID of the vector store to create the file batch for" + }, "file_ids": { "type": "array", "items": { @@ -12384,6 +12365,7 @@ }, "additionalProperties": false, "required": [ + "vector_store_id", "file_ids" ], "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 3cbcbc82b7..d163e32f02 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1932,14 +1932,7 @@ paths: Generate an OpenAI-compatible vector store file batch for the given vector store. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + parameters: [] requestBody: content: application/json: @@ -9205,19 +9198,6 @@ components: - type: object description: >- Set of key-value pairs that can be attached to the vector store - embedding_model: - type: string - description: >- - (Optional) The embedding model to use for this vector store - embedding_dimension: - type: integer - default: 384 - description: >- - (Optional) The dimension of the embedding vectors (default: 384) - provider_id: - type: string - description: >- - (Optional) The ID of the provider to use for this vector store additionalProperties: false title: >- OpenAICreateVectorStoreRequestWithExtraBody @@ -9346,6 +9326,10 @@ components: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: + vector_store_id: + type: string + description: >- + The ID of the vector store to create the file batch for file_ids: type: array items: @@ -9371,6 +9355,7 @@ components: auto additionalProperties: false required: + - vector_store_id - file_ids title: >- OpenAICreateVectorStoreFileBatchRequestWithExtraBody diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index c1aa41abce..2bf50e5c76 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -3357,17 +3357,7 @@ ], "summary": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", - "parameters": [ - { - "name": "vector_store_id", - "in": "path", - "description": "The ID of the vector store to create the file batch for.", - "required": true, - "schema": { - "type": "string" - } - } - ], + "parameters": [], "requestBody": { "content": { "application/json": { @@ -12680,19 +12670,6 @@ ] }, "description": "Set of key-value pairs that can be attached to the vector store" - }, - "embedding_model": { - "type": "string", - "description": "(Optional) The embedding model to use for this vector store" - }, - "embedding_dimension": { - "type": "integer", - "default": 384, - "description": "(Optional) The dimension of the embedding vectors (default: 384)" - }, - "provider_id": { - "type": "string", - "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, @@ -12870,6 +12847,10 @@ "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { + "vector_store_id": { + "type": "string", + "description": "The ID of the vector store to create the file batch for" + }, "file_ids": { "type": "array", "items": { @@ -12910,6 +12891,7 @@ }, "additionalProperties": false, "required": [ + "vector_store_id", "file_ids" ], "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index a66ceede83..d221c2129f 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2553,14 +2553,7 @@ paths: Generate an OpenAI-compatible vector store file batch for the given vector store. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + parameters: [] requestBody: content: application/json: @@ -9670,19 +9663,6 @@ components: - type: object description: >- Set of key-value pairs that can be attached to the vector store - embedding_model: - type: string - description: >- - (Optional) The embedding model to use for this vector store - embedding_dimension: - type: integer - default: 384 - description: >- - (Optional) The dimension of the embedding vectors (default: 384) - provider_id: - type: string - description: >- - (Optional) The ID of the provider to use for this vector store additionalProperties: false title: >- OpenAICreateVectorStoreRequestWithExtraBody @@ -9811,6 +9791,10 @@ components: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: + vector_store_id: + type: string + description: >- + The ID of the vector store to create the file batch for file_ids: type: array items: @@ -9836,6 +9820,7 @@ components: auto additionalProperties: false required: + - vector_store_id - file_ids title: >- OpenAICreateVectorStoreFileBatchRequestWithExtraBody diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 717352b35b..60865ef0ba 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -3357,17 +3357,7 @@ ], "summary": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", - "parameters": [ - { - "name": "vector_store_id", - "in": "path", - "description": "The ID of the vector store to create the file batch for.", - "required": true, - "schema": { - "type": "string" - } - } - ], + "parameters": [], "requestBody": { "content": { "application/json": { @@ -14689,19 +14679,6 @@ ] }, "description": "Set of key-value pairs that can be attached to the vector store" - }, - "embedding_model": { - "type": "string", - "description": "(Optional) The embedding model to use for this vector store" - }, - "embedding_dimension": { - "type": "integer", - "default": 384, - "description": "(Optional) The dimension of the embedding vectors (default: 384)" - }, - "provider_id": { - "type": "string", - "description": "(Optional) The ID of the provider to use for this vector store" } }, "additionalProperties": false, @@ -14879,6 +14856,10 @@ "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { + "vector_store_id": { + "type": "string", + "description": "The ID of the vector store to create the file batch for" + }, "file_ids": { "type": "array", "items": { @@ -14919,6 +14900,7 @@ }, "additionalProperties": false, "required": [ + "vector_store_id", "file_ids" ], "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 4a5f7c8e0d..3f9270b933 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2556,14 +2556,7 @@ paths: Generate an OpenAI-compatible vector store file batch for the given vector store. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + parameters: [] requestBody: content: application/json: @@ -11115,19 +11108,6 @@ components: - type: object description: >- Set of key-value pairs that can be attached to the vector store - embedding_model: - type: string - description: >- - (Optional) The embedding model to use for this vector store - embedding_dimension: - type: integer - default: 384 - description: >- - (Optional) The dimension of the embedding vectors (default: 384) - provider_id: - type: string - description: >- - (Optional) The ID of the provider to use for this vector store additionalProperties: false title: >- OpenAICreateVectorStoreRequestWithExtraBody @@ -11256,6 +11236,10 @@ components: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: + vector_store_id: + type: string + description: >- + The ID of the vector store to create the file batch for file_ids: type: array items: @@ -11281,6 +11265,7 @@ components: auto additionalProperties: false required: + - vector_store_id - file_ids title: >- OpenAICreateVectorStoreFileBatchRequestWithExtraBody diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 3ced81bdd6..17e9dae700 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -491,11 +491,13 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): """Request to create a vector store file batch with extra_body support. + :param vector_store_id: The ID of the vector store to create the file batch for :param file_ids: A list of File IDs that the vector store should use :param attributes: (Optional) Key-value attributes to store with the files :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto """ + vector_store_id: str file_ids: list[str] attributes: dict[str, Any] | None = None chunking_strategy: VectorStoreChunkingStrategy | None = None @@ -847,13 +849,11 @@ async def openai_delete_vector_store_file( ) async def openai_create_vector_store_file_batch( self, - vector_store_id: str, params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch. Generate an OpenAI-compatible vector store file batch for the given vector store. - :param vector_store_id: The ID of the vector store to create the file batch for. :returns: A VectorStoreFileBatchObject representing the created file batch. """ ... diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 27e56fef2b..b8ec69bbe3 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -135,7 +135,7 @@ async def openai_create_vector_store( logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={provider_id}") # If no embedding model is provided, use the first available one - # TODO: this branch will soon be deleted so you _must_ provide the embedding_model when + # TODO: this branch will soon be deleted so you _must_ provide the embedding_model when # creating a vector store if embedding_model is None: embedding_model_info = await self._get_first_embedding_model() @@ -383,17 +383,13 @@ async def health(self) -> dict[str, HealthResponse]: async def openai_create_vector_store_file_batch( self, - vector_store_id: str, params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: logger.debug( - f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" - ) - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_create_vector_store_file_batch( - vector_store_id=vector_store_id, - params=params, + f"VectorIORouter.openai_create_vector_store_file_batch: {params.vector_store_id}, {len(params.file_ids)} files" ) + provider = await self.routing_table.get_provider_impl(params.vector_store_id) + return await provider.openai_create_vector_store_file_batch(params) async def openai_retrieve_vector_store_file_batch( self, diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 70bcbba320..23330a3d02 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -978,10 +978,10 @@ async def openai_delete_vector_store_file( async def openai_create_vector_store_file_batch( self, - vector_store_id: str, params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch.""" + vector_store_id = params.vector_store_id if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) From eaa91aa4086fb40666a1d4126fbc745446f9653e Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 12 Oct 2025 17:51:02 -0700 Subject: [PATCH 7/9] pre-commit and unit test fixes --- .../test_vector_io_openai_vector_stores.py | 32 ++++++++++--------- tests/unit/rag/test_vector_store.py | 27 ++++++++++++---- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index a5c491f533..62d95f6cf2 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -327,7 +327,7 @@ async def test_create_vector_store_file_batch(vector_io_adapter): vector_io_adapter._process_file_batch_async = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) assert batch.vector_store_id == store_id @@ -354,7 +354,7 @@ async def test_retrieve_vector_store_file_batch(vector_io_adapter): # Create batch first created_batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # Retrieve batch @@ -387,7 +387,7 @@ async def test_cancel_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # Cancel batch @@ -432,7 +432,7 @@ async def test_list_files_in_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # List files @@ -451,7 +451,9 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test nonexistent vector store with pytest.raises(VectorStoreNotFoundError): await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id="nonexistent", params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody( + vector_store_id="nonexistent", file_ids=["file_1"] + ), ) # Setup store for remaining tests @@ -468,7 +470,7 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test wrong vector store for batch vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_1"]) ) # Create wrong_store so it exists but the batch doesn't belong to it @@ -515,7 +517,7 @@ async def test_file_batch_pagination(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # Test pagination with limit @@ -587,7 +589,7 @@ async def test_file_batch_status_filtering(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # Test filtering by completed status @@ -629,7 +631,7 @@ async def test_cancel_completed_batch_fails(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # Manually update status to completed @@ -663,7 +665,7 @@ async def test_file_batch_persistence_across_restarts(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) batch_id = batch.id @@ -718,7 +720,7 @@ async def test_cancelled_batch_persists_in_storage(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) batch_id = batch.id @@ -765,10 +767,10 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create multiple batches batch1 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_1"]) ) batch2 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_2"]) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_2"]) ) # Complete one batch (should persist with completed status) @@ -781,7 +783,7 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create a third batch that stays in progress batch3 = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_3"]) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_3"]) ) # Simulate restart - clear memory and reload from persistence @@ -942,7 +944,7 @@ async def mock_attach_file_with_delay(vector_store_id: str, file_id: str, **kwar file_ids = [f"file_{i}" for i in range(8)] # 8 files, but limit should be 5 batch = await vector_io_adapter.openai_create_vector_store_file_batch( - vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) ) # Give time for the semaphore logic to start processing files diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 8c017a5513..1e40c98e80 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -13,7 +13,10 @@ import numpy as np import pytest -from llama_stack.apis.inference.inference import OpenAIEmbeddingData +from llama_stack.apis.inference.inference import ( + OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, +) from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( @@ -226,9 +229,14 @@ async def test_insert_chunks_without_embeddings(self): await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.openai_embeddings.assert_called_once_with( - "test-model without embeddings", ["Test 1", "Test 2"] - ) + # Verify openai_embeddings was called with correct params + mock_inference_api.openai_embeddings.assert_called_once() + call_args = mock_inference_api.openai_embeddings.call_args[0] + assert len(call_args) == 1 + params = call_args[0] + assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody) + assert params.model == "test-model without embeddings" + assert params.input == ["Test 1", "Test 2"] mock_index.add_chunks.assert_called_once() args = mock_index.add_chunks.call_args[0] assert args[0] == chunks @@ -321,9 +329,14 @@ async def test_insert_chunks_with_partially_precomputed_embeddings(self): await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.openai_embeddings.assert_called_once_with( - "test-model with partial embeddings", ["Test 1", "Test 3"] - ) + # Verify openai_embeddings was called with correct params + mock_inference_api.openai_embeddings.assert_called_once() + call_args = mock_inference_api.openai_embeddings.call_args[0] + assert len(call_args) == 1 + params = call_args[0] + assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody) + assert params.model == "test-model with partial embeddings" + assert params.input == ["Test 1", "Test 3"] mock_index.add_chunks.assert_called_once() args = mock_index.add_chunks.call_args[0] assert len(args[0]) == 3 From e6428492e6bd4c5757a5f075fd4d48ad5e186e39 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 12 Oct 2025 18:48:55 -0700 Subject: [PATCH 8/9] undo vector_store_id in params --- docs/static/deprecated-llama-stack-spec.html | 17 ++++++---- docs/static/deprecated-llama-stack-spec.yaml | 14 ++++---- docs/static/llama-stack-spec.html | 17 ++++++---- docs/static/llama-stack-spec.yaml | 14 ++++---- docs/static/stainless-llama-stack-spec.html | 17 ++++++---- docs/static/stainless-llama-stack-spec.yaml | 14 ++++---- llama_stack/apis/vector_io/vector_io.py | 4 +-- llama_stack/core/library_client.py | 14 ++++++++ llama_stack/core/routers/vector_io.py | 7 ++-- .../utils/memory/openai_vector_store_mixin.py | 2 +- .../test_vector_io_openai_vector_stores.py | 33 +++++++++---------- 11 files changed, 94 insertions(+), 59 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index b833817491..46417522cc 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -2623,7 +2623,17 @@ ], "summary": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", - "parameters": [], + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store to create the file batch for.", + "required": true, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -12321,10 +12331,6 @@ "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { - "vector_store_id": { - "type": "string", - "description": "The ID of the vector store to create the file batch for" - }, "file_ids": { "type": "array", "items": { @@ -12365,7 +12371,6 @@ }, "additionalProperties": false, "required": [ - "vector_store_id", "file_ids" ], "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index d163e32f02..ffdfd8bc7c 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1932,7 +1932,14 @@ paths: Generate an OpenAI-compatible vector store file batch for the given vector store. - parameters: [] + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to create the file batch for. + required: true + schema: + type: string requestBody: content: application/json: @@ -9326,10 +9333,6 @@ components: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: - vector_store_id: - type: string - description: >- - The ID of the vector store to create the file batch for file_ids: type: array items: @@ -9355,7 +9358,6 @@ components: auto additionalProperties: false required: - - vector_store_id - file_ids title: >- OpenAICreateVectorStoreFileBatchRequestWithExtraBody diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index c3aa9fbc0f..24e88b5f6c 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -3357,7 +3357,17 @@ ], "summary": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", - "parameters": [], + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store to create the file batch for.", + "required": true, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -12847,10 +12857,6 @@ "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { - "vector_store_id": { - "type": "string", - "description": "The ID of the vector store to create the file batch for" - }, "file_ids": { "type": "array", "items": { @@ -12891,7 +12897,6 @@ }, "additionalProperties": false, "required": [ - "vector_store_id", "file_ids" ], "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 076cb50223..ac16410793 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2553,7 +2553,14 @@ paths: Generate an OpenAI-compatible vector store file batch for the given vector store. - parameters: [] + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to create the file batch for. + required: true + schema: + type: string requestBody: content: application/json: @@ -9791,10 +9798,6 @@ components: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: - vector_store_id: - type: string - description: >- - The ID of the vector store to create the file batch for file_ids: type: array items: @@ -9820,7 +9823,6 @@ components: auto additionalProperties: false required: - - vector_store_id - file_ids title: >- OpenAICreateVectorStoreFileBatchRequestWithExtraBody diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index a7854c17b0..4184f13796 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -3357,7 +3357,17 @@ ], "summary": "Create a vector store file batch.", "description": "Create a vector store file batch.\nGenerate an OpenAI-compatible vector store file batch for the given vector store.", - "parameters": [], + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store to create the file batch for.", + "required": true, + "schema": { + "type": "string" + } + } + ], "requestBody": { "content": { "application/json": { @@ -14856,10 +14866,6 @@ "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": { "type": "object", "properties": { - "vector_store_id": { - "type": "string", - "description": "The ID of the vector store to create the file batch for" - }, "file_ids": { "type": "array", "items": { @@ -14900,7 +14906,6 @@ }, "additionalProperties": false, "required": [ - "vector_store_id", "file_ids" ], "title": "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 6910c428fe..b01779abbe 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2556,7 +2556,14 @@ paths: Generate an OpenAI-compatible vector store file batch for the given vector store. - parameters: [] + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store to create the file batch for. + required: true + schema: + type: string requestBody: content: application/json: @@ -11236,10 +11243,6 @@ components: "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": type: object properties: - vector_store_id: - type: string - description: >- - The ID of the vector store to create the file batch for file_ids: type: array items: @@ -11265,7 +11268,6 @@ components: auto additionalProperties: false required: - - vector_store_id - file_ids title: >- OpenAICreateVectorStoreFileBatchRequestWithExtraBody diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 17e9dae700..3ced81bdd6 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -491,13 +491,11 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): """Request to create a vector store file batch with extra_body support. - :param vector_store_id: The ID of the vector store to create the file batch for :param file_ids: A list of File IDs that the vector store should use :param attributes: (Optional) Key-value attributes to store with the files :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto """ - vector_store_id: str file_ids: list[str] attributes: dict[str, Any] | None = None chunking_strategy: VectorStoreChunkingStrategy | None = None @@ -849,11 +847,13 @@ async def openai_delete_vector_store_file( ) async def openai_create_vector_store_file_batch( self, + vector_store_id: str, params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch. Generate an OpenAI-compatible vector store file batch for the given vector store. + :param vector_store_id: The ID of the vector store to create the file batch for. :returns: A VectorStoreFileBatchObject representing the created file batch. """ ... diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index 5d45bd8ad3..4d33576ba4 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -513,6 +513,14 @@ def _convert_body(self, func: Any, body: dict | None = None, exclude_params: set # Strip NOT_GIVENs to use the defaults in signature body = {k: v for k, v in body.items() if v is not NOT_GIVEN} + # Check if there's an unwrapped body parameter among multiple parameters + # (e.g., path param + body param like: vector_store_id: str, params: Annotated[Model, Body(...)]) + unwrapped_body_param = None + for param in params_list: + if is_unwrapped_body_param(param.annotation): + unwrapped_body_param = param + break + # Convert parameters to Pydantic models where needed converted_body = {} for param_name, param in sig.parameters.items(): @@ -522,5 +530,11 @@ def _convert_body(self, func: Any, body: dict | None = None, exclude_params: set converted_body[param_name] = value else: converted_body[param_name] = convert_to_pydantic(param.annotation, value) + elif unwrapped_body_param and param.name == unwrapped_body_param.name: + # This is the unwrapped body param - construct it from remaining body keys + base_type = get_args(param.annotation)[0] + # Extract only the keys that aren't already used by other params + remaining_keys = {k: v for k, v in body.items() if k not in converted_body} + converted_body[param.name] = base_type(**remaining_keys) return converted_body diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index b8ec69bbe3..79789ef0a2 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -383,13 +383,14 @@ async def health(self) -> dict[str, HealthResponse]: async def openai_create_vector_store_file_batch( self, + vector_store_id: str, params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: logger.debug( - f"VectorIORouter.openai_create_vector_store_file_batch: {params.vector_store_id}, {len(params.file_ids)} files" + f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" ) - provider = await self.routing_table.get_provider_impl(params.vector_store_id) - return await provider.openai_create_vector_store_file_batch(params) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch(vector_store_id, params) async def openai_retrieve_vector_store_file_batch( self, diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 23330a3d02..70bcbba320 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -978,10 +978,10 @@ async def openai_delete_vector_store_file( async def openai_create_vector_store_file_batch( self, + vector_store_id: str, params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], ) -> VectorStoreFileBatchObject: """Create a vector store file batch.""" - vector_store_id = params.vector_store_id if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 62d95f6cf2..28b07beb84 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -327,7 +327,7 @@ async def test_create_vector_store_file_batch(vector_io_adapter): vector_io_adapter._process_file_batch_async = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) assert batch.vector_store_id == store_id @@ -354,7 +354,7 @@ async def test_retrieve_vector_store_file_batch(vector_io_adapter): # Create batch first created_batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Retrieve batch @@ -387,7 +387,7 @@ async def test_cancel_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Cancel batch @@ -432,7 +432,7 @@ async def test_list_files_in_vector_store_file_batch(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # List files @@ -451,9 +451,8 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test nonexistent vector store with pytest.raises(VectorStoreNotFoundError): await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody( - vector_store_id="nonexistent", file_ids=["file_1"] - ), + vector_store_id="nonexistent", + params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]), ) # Setup store for remaining tests @@ -470,7 +469,7 @@ async def test_file_batch_validation_errors(vector_io_adapter): # Test wrong vector store for batch vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock() batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_1"]) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) # Create wrong_store so it exists but the batch doesn't belong to it @@ -517,7 +516,7 @@ async def test_file_batch_pagination(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Test pagination with limit @@ -589,7 +588,7 @@ async def test_file_batch_status_filtering(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Test filtering by completed status @@ -631,7 +630,7 @@ async def test_cancel_completed_batch_fails(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Manually update status to completed @@ -665,7 +664,7 @@ async def test_file_batch_persistence_across_restarts(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) batch_id = batch.id @@ -720,7 +719,7 @@ async def test_cancelled_batch_persists_in_storage(vector_io_adapter): # Create batch batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) batch_id = batch.id @@ -767,10 +766,10 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create multiple batches batch1 = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_1"]) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"]) ) batch2 = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_2"]) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_2"]) ) # Complete one batch (should persist with completed status) @@ -783,7 +782,7 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter): # Create a third batch that stays in progress batch3 = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_3"]) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_3"]) ) # Simulate restart - clear memory and reload from persistence @@ -944,7 +943,7 @@ async def mock_attach_file_with_delay(vector_store_id: str, file_id: str, **kwar file_ids = [f"file_{i}" for i in range(8)] # 8 files, but limit should be 5 batch = await vector_io_adapter.openai_create_vector_store_file_batch( - params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids) + vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids) ) # Give time for the semaphore logic to start processing files From bbdde4ec124b5819ea1d20435119416e634595d2 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 12 Oct 2025 18:56:29 -0700 Subject: [PATCH 9/9] fix batch embeddings --- llama_stack/providers/inline/batches/reference/batches.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/batches/reference/batches.py b/llama_stack/providers/inline/batches/reference/batches.py index 102537dd7e..fa581ae1fe 100644 --- a/llama_stack/providers/inline/batches/reference/batches.py +++ b/llama_stack/providers/inline/batches/reference/batches.py @@ -25,6 +25,7 @@ OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, OpenAIDeveloperMessageParam, + OpenAIEmbeddingsRequestWithExtraBody, OpenAIMessageParam, OpenAISystemMessageParam, OpenAIToolMessageParam, @@ -640,7 +641,9 @@ async def _process_single_request(self, batch_id: str, request: BatchRequest) -> }, } else: # /v1/embeddings - embeddings_response = await self.inference_api.openai_embeddings(**request.body) + embeddings_response = await self.inference_api.openai_embeddings( + OpenAIEmbeddingsRequestWithExtraBody(**request.body) + ) assert hasattr(embeddings_response, "model_dump_json"), ( "Embeddings response must have model_dump_json method" )