diff --git a/core/providers/azure.go b/core/providers/azure.go
index e41015d3e2..c802eac029 100644
--- a/core/providers/azure.go
+++ b/core/providers/azure.go
@@ -31,32 +31,6 @@ type AzureTextResponse struct {
 	Usage             schemas.LLMUsage `json:"usage"`              // Token usage statistics
 }
 
-// AzureChatResponse represents the response structure from Azure's chat completion API.
-// It includes completion choices, model information, and usage statistics.
-type AzureChatResponse struct {
-	ID                string                          `json:"id"`                 // Unique identifier for the completion
-	Object            string                          `json:"object"`             // Type of completion (always "chat.completion")
-	Choices           []schemas.BifrostResponseChoice `json:"choices"`            // Array of completion choices
-	Model             string                          `json:"model"`              // Model used for the completion
-	Created           int                             `json:"created"`            // Unix timestamp of completion creation
-	SystemFingerprint *string                         `json:"system_fingerprint"` // System fingerprint for the request
-	Usage             schemas.LLMUsage                `json:"usage"`              // Token usage statistics
-}
-
-// AzureEmbeddingResponse represents the response structure from Azure's embedding API.
-type AzureEmbeddingResponse struct {
-	Object string `json:"object"`
-	Data   []struct {
-		Object    string      `json:"object"`
-		Embedding interface{} `json:"embedding"`
-		Index     int         `json:"index"`
-	} `json:"data"`
-	Model             string           `json:"model"`
-	Usage             schemas.LLMUsage `json:"usage"`
-	ID                string           `json:"id"`
-	SystemFingerprint *string          `json:"system_fingerprint"`
-}
-
 // AzureError represents the error response structure from Azure's API.
 // It includes error code and message information.
 type AzureError struct {
@@ -79,19 +53,19 @@ var azureTextCompletionResponsePool = sync.Pool{
 // azureChatResponsePool provides a pool for Azure chat response objects.
 var azureChatResponsePool = sync.Pool{
 	New: func() interface{} {
-		return &AzureChatResponse{}
+		return &schemas.BifrostResponse{}
 	},
 }
 
 // acquireAzureChatResponse gets an Azure chat response from the pool and resets it.
-func acquireAzureChatResponse() *AzureChatResponse {
-	resp := azureChatResponsePool.Get().(*AzureChatResponse)
-	*resp = AzureChatResponse{} // Reset the struct
+func acquireAzureChatResponse() *schemas.BifrostResponse {
+	resp := azureChatResponsePool.Get().(*schemas.BifrostResponse)
+	*resp = schemas.BifrostResponse{} // Reset the struct
 	return resp
 }
 
 // releaseAzureChatResponse returns an Azure chat response to the pool.
-func releaseAzureChatResponse(resp *AzureChatResponse) {
+func releaseAzureChatResponse(resp *schemas.BifrostResponse) {
 	if resp != nil {
 		azureChatResponsePool.Put(resp)
 	}
@@ -139,7 +113,7 @@ func NewAzureProvider(config *schemas.ProviderConfig, logger schemas.Logger) (*A
 
 	// Pre-warm response pools
 	for range config.ConcurrencyAndBufferSize.Concurrency {
-		azureChatResponsePool.Put(&AzureChatResponse{})
+		azureChatResponsePool.Put(&schemas.BifrostResponse{})
 		azureTextCompletionResponsePool.Put(&AzureTextResponse{})
 
 	}
@@ -342,39 +316,24 @@ func (provider *AzureProvider) ChatCompletion(ctx context.Context, model string,
 		return nil, bifrostErr
 	}
 
-	// Create final response
-	bifrostResponse := &schemas.BifrostResponse{
-		ID:                response.ID,
-		Choices:           response.Choices,
-		Model:             response.Model,
-		Created:           response.Created,
-		SystemFingerprint: response.SystemFingerprint,
-		Usage:             &response.Usage,
-		ExtraFields: schemas.BifrostResponseExtraFields{
-			Provider: schemas.Azure,
-		},
-	}
+	response.ExtraFields.Provider = schemas.Azure
 
 	// Set raw response if enabled
 	if provider.sendBackRawResponse {
-		bifrostResponse.ExtraFields.RawResponse = rawResponse
+		response.ExtraFields.RawResponse = rawResponse
 	}
 
 	if params != nil {
-		bifrostResponse.ExtraFields.Params = *params
+		response.ExtraFields.Params = *params
 	}
 
-	return bifrostResponse, nil
+	return response, nil
 }
 
 // Embedding generates embeddings for the given input text(s) using Azure OpenAI.
 // The input can be either a single string or a slice of strings for batch embedding.
 // Returns a BifrostResponse containing the embedding(s) and any error that occurred.
 func (provider *AzureProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) {
-	if len(input.Texts) == 0 {
-		return nil, newBifrostOperationError("no input text provided for embedding", nil, schemas.Azure)
-	}
-
 	// Prepare request body - Azure uses deployment-scoped URLs, so model is not needed in body
 	requestBody := map[string]interface{}{
 		"input": input.Texts,
@@ -399,61 +358,27 @@ func (provider *AzureProvider) Embedding(ctx context.Context, model string, key
 		return nil, err
 	}
 
-	// Parse response
-	var response AzureEmbeddingResponse
-	if err := sonic.Unmarshal(responseBody, &response); err != nil {
-		return nil, newBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err, schemas.Azure)
-	}
+	// Pre-allocate response structs from pools
+	response := acquireAzureChatResponse()
+	defer releaseAzureChatResponse(response)
 
-	bifrostResponse := &schemas.BifrostResponse{
-		ID:                response.ID,
-		Object:            response.Object,
-		Model:             response.Model,
-		Usage:             &response.Usage,
-		SystemFingerprint: response.SystemFingerprint,
-		ExtraFields: schemas.BifrostResponseExtraFields{
-			Provider:    schemas.Azure,
-			RawResponse: responseBody,
-		},
+	// Use enhanced response handler with pre-allocated response
+	rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse)
+	if bifrostErr != nil {
+		return nil, bifrostErr
 	}
 
-	// Extract embeddings from response data
-	if len(response.Data) > 0 {
-		embeddings := make([][]float32, len(response.Data))
-		for i, data := range response.Data {
-			switch v := data.Embedding.(type) {
-			case []float32:
-				embeddings[i] = v
-			case []float64:
-				// Direct conversion from []float64 to []float32
-				floatArray := make([]float32, len(v))
-				for j := range v {
-					floatArray[j] = float32(v[j])
-				}
-				embeddings[i] = floatArray
-			case []interface{}:
-				// Fallback: element-by-element conversion for []interface{}
-				floatArray := make([]float32, len(v))
-				for j := range v {
-					if num, ok := v[j].(float64); ok {
-						floatArray[j] = float32(num)
-					} else {
-						return nil, newBifrostOperationError(fmt.Sprintf("unsupported number type in embedding array: %T", v[j]), nil, schemas.Azure)
-					}
-				}
-				embeddings[i] = floatArray
-			default:
-				return nil, newBifrostOperationError(fmt.Sprintf("unsupported embedding type: %T", data.Embedding), nil, schemas.Azure)
-			}
-		}
-		bifrostResponse.Embedding = embeddings
-	}
+	response.ExtraFields.Provider = schemas.Azure
 
 	if params != nil {
-		bifrostResponse.ExtraFields.Params = *params
+		response.ExtraFields.Params = *params
 	}
 
-	return bifrostResponse, nil
+	if provider.sendBackRawResponse {
+		response.ExtraFields.RawResponse = rawResponse
+	}
+
+	return response, nil
 }
 
 // ChatCompletionStream performs a streaming chat completion request to Azure's OpenAI API.
diff --git a/core/providers/bedrock.go b/core/providers/bedrock.go
index 8e2f3a38e3..7dde654bf5 100644
--- a/core/providers/bedrock.go
+++ b/core/providers/bedrock.go
@@ -1130,14 +1130,6 @@ func (provider *BedrockProvider) Embedding(ctx context.Context, model string, ke
 
 // handleTitanEmbedding handles embedding requests for Amazon Titan models.
 func (provider *BedrockProvider) handleTitanEmbedding(ctx context.Context, model string, key string, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) {
-	// Titan Text Embeddings V1/V2 - only supports single text input
-	if len(input.Texts) == 0 {
-		return nil, newConfigurationError("no input text provided for embedding", schemas.Bedrock)
-	}
-	if len(input.Texts) > 1 {
-		return nil, newConfigurationError("Amazon Titan embedding models support only single text input, received multiple texts", schemas.Bedrock)
-	}
-
 	requestBody := map[string]interface{}{
 		"inputText": input.Texts[0],
 	}
@@ -1171,8 +1163,17 @@ func (provider *BedrockProvider) handleTitanEmbedding(ctx context.Context, model
 	}
 
 	bifrostResponse := &schemas.BifrostResponse{
-		Embedding: [][]float32{titanResp.Embedding},
-		Model:     model,
+		Object: "list",
+		Data: []schemas.BifrostEmbedding{
+			{
+				Index:  0,
+				Object: "embedding",
+				Embedding: schemas.BifrostEmbeddingResponse{
+					Embedding2DArray: &[][]float32{titanResp.Embedding},
+				},
+			},
+		},
+		Model: model,
 		Usage: &schemas.LLMUsage{
 			PromptTokens: titanResp.InputTextTokenCount,
 			TotalTokens:  titanResp.InputTextTokenCount,
@@ -1192,10 +1193,6 @@ func (provider *BedrockProvider) handleTitanEmbedding(ctx context.Context, model
 
 // handleCohereEmbedding handles embedding requests for Cohere models on Bedrock.
 func (provider *BedrockProvider) handleCohereEmbedding(ctx context.Context, model string, key string, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) {
-	if len(input.Texts) == 0 {
-		return nil, newConfigurationError("no input text provided for embedding", schemas.Bedrock)
-	}
-
 	requestBody := map[string]interface{}{
 		"texts":      input.Texts,
 		"input_type": "search_document",
@@ -1225,9 +1222,18 @@ func (provider *BedrockProvider) handleCohereEmbedding(ctx context.Context, mode
 	totalInputTokens := approximateTokenCount(input.Texts)
 
 	bifrostResponse := &schemas.BifrostResponse{
-		Embedding: cohereResp.Embeddings,
-		ID:        cohereResp.ID,
-		Model:     model,
+		Object: "list",
+		Data: []schemas.BifrostEmbedding{
+			{
+				Index:  0,
+				Object: "embedding",
+				Embedding: schemas.BifrostEmbeddingResponse{
+					Embedding2DArray: &cohereResp.Embeddings,
+				},
+			},
+		},
+		ID:    cohereResp.ID,
+		Model: model,
 		Usage: &schemas.LLMUsage{
 			PromptTokens: totalInputTokens,
 			TotalTokens:  totalInputTokens,
diff --git a/core/providers/cohere.go b/core/providers/cohere.go
index 0135cccc59..7ab86387b6 100644
--- a/core/providers/cohere.go
+++ b/core/providers/cohere.go
@@ -596,10 +596,6 @@ func convertChatHistory(history []struct {
 // Embedding generates embeddings for the given input text(s) using the Cohere API.
 // Supports Cohere's embedding models and returns a BifrostResponse containing the embedding(s).
 func (provider *CohereProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) {
-	if len(input.Texts) == 0 {
-		return nil, newConfigurationError("no input text provided for embedding", schemas.Cohere)
-	}
-
 	// Prepare request body with default values
 	requestBody := map[string]interface{}{
 		"texts":           input.Texts,
@@ -683,9 +679,18 @@ func (provider *CohereProvider) Embedding(ctx context.Context, model string, key
 
 	// Create BifrostResponse
 	bifrostResponse := &schemas.BifrostResponse{
-		ID:        cohereResp.ID,
-		Embedding: cohereResp.Embeddings.Float,
-		Model:     model,
+		ID:     cohereResp.ID,
+		Object: "list",
+		Data: []schemas.BifrostEmbedding{
+			{
+				Index:  0,
+				Object: "embedding",
+				Embedding: schemas.BifrostEmbeddingResponse{
+					Embedding2DArray: &cohereResp.Embeddings.Float,
+				},
+			},
+		},
+		Model: model,
 		Usage: &schemas.LLMUsage{
 			PromptTokens: totalInputTokens,
 			TotalTokens:  totalInputTokens,
diff --git a/core/providers/mistral.go b/core/providers/mistral.go
index a32c30879b..9910edb070 100644
--- a/core/providers/mistral.go
+++ b/core/providers/mistral.go
@@ -15,20 +15,6 @@ import (
 	"github.com/valyala/fasthttp"
 )
 
-// MistralEmbeddingResponse represents the response structure from Mistral's embedding API.
-type MistralEmbeddingResponse struct {
-	Object string `json:"object"`
-	Data   []struct {
-		Object    string    `json:"object"`
-		Embedding []float32 `json:"embedding"`
-		Index     int       `json:"index"`
-	} `json:"data"`
-	Model             string           `json:"model"`
-	Usage             schemas.LLMUsage `json:"usage"`
-	ID                string           `json:"id"`
-	SystemFingerprint *string          `json:"system_fingerprint"`
-}
-
 // mistralResponsePool provides a pool for Mistral response objects.
 var mistralResponsePool = sync.Pool{
 	New: func() interface{} {
@@ -183,10 +169,6 @@ func (provider *MistralProvider) ChatCompletion(ctx context.Context, model strin
 // Embedding generates embeddings for the given input text(s) using the Mistral API.
 // Supports Mistral's embedding models and returns a BifrostResponse containing the embedding(s).
 func (provider *MistralProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) {
-	if len(input.Texts) == 0 {
-		return nil, newConfigurationError("no input text provided for embedding", schemas.Mistral)
-	}
-
 	// Prepare request body with base parameters
 	requestBody := map[string]interface{}{
 		"model": model,
@@ -254,46 +236,29 @@ func (provider *MistralProvider) Embedding(ctx context.Context, model string, ke
 		return nil, bifrostErr
 	}
 
-	// Parse response using sonic.RawMessage to avoid double parsing
-	rawMessage := resp.Body()
+	responseBody := resp.Body()
 
-	// Parse into structured response
-	var mistralResp MistralEmbeddingResponse
-	if err := sonic.Unmarshal(rawMessage, &mistralResp); err != nil {
-		return nil, newBifrostOperationError("error parsing Mistral embedding response", err, schemas.Mistral)
-	}
+	// Pre-allocate response structs from pools
+	response := acquireMistralResponse()
+	defer releaseMistralResponse(response)
 
-	// Parse raw response for consistent format
-	var rawResponse interface{}
-	if err := sonic.Unmarshal(rawMessage, &rawResponse); err != nil {
-		return nil, newBifrostOperationError("error parsing raw response for Mistral embedding", err, schemas.Mistral)
+	// Use enhanced response handler with pre-allocated response
+	rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse)
+	if bifrostErr != nil {
+		return nil, bifrostErr
 	}
 
-	// Convert data to embeddings array
-	var embeddings [][]float32
-	for _, data := range mistralResp.Data {
-		embeddings = append(embeddings, data.Embedding)
-	}
+	response.ExtraFields.Provider = schemas.Mistral
 
-	// Create BifrostResponse
-	bifrostResponse := &schemas.BifrostResponse{
-		ID:                mistralResp.ID,
-		Object:            mistralResp.Object,
-		Embedding:         embeddings,
-		Model:             mistralResp.Model,
-		Usage:             &mistralResp.Usage,
-		SystemFingerprint: mistralResp.SystemFingerprint,
-		ExtraFields: schemas.BifrostResponseExtraFields{
-			Provider:    schemas.Mistral,
-			RawResponse: rawResponse,
-		},
+	if params != nil {
+		response.ExtraFields.Params = *params
 	}
 
-	if params != nil {
-		bifrostResponse.ExtraFields.Params = *params
+	if provider.sendBackRawResponse {
+		response.ExtraFields.RawResponse = rawResponse
 	}
 
-	return bifrostResponse, nil
+	return response, nil
 }
 
 // ChatCompletionStream performs a streaming chat completion request to the Mistral API.
diff --git a/core/providers/openai.go b/core/providers/openai.go
index dd5cef215d..1714f62417 100644
--- a/core/providers/openai.go
+++ b/core/providers/openai.go
@@ -6,11 +6,8 @@ import (
 	"bufio"
 	"bytes"
 	"context"
-	"encoding/base64"
-	"encoding/binary"
 	"fmt"
 	"io"
-	"math"
 	"mime/multipart"
 	"net/http"
 	"strings"
@@ -22,24 +19,6 @@ import (
 	"github.com/valyala/fasthttp"
 )
 
-// OpenAIResponse represents the response structure from the OpenAI API.
-// It includes completion choices, model information, and usage statistics.
-type OpenAIResponse struct {
-	ID      string                          `json:"id"`      // Unique identifier for the completion
-	Object  string                          `json:"object"`  // Type of completion (text.completion, chat.completion, or embedding)
-	Choices []schemas.BifrostResponseChoice `json:"choices"` // Array of completion choices
-	Data    []struct {                      // Embedding data
-		Object    string `json:"object"`
-		Embedding any    `json:"embedding"`
-		Index     int    `json:"index"`
-	} `json:"data,omitempty"`
-	Model             string           `json:"model"`              // Model used for the completion
-	Created           int              `json:"created"`            // Unix timestamp of completion creation
-	ServiceTier       *string          `json:"service_tier"`       // Service tier used for the request
-	SystemFingerprint *string          `json:"system_fingerprint"` // System fingerprint for the request
-	Usage             schemas.LLMUsage `json:"usage"`              // Token usage statistics
-}
-
 // openAIResponsePool provides a pool for OpenAI response objects.
 var openAIResponsePool = sync.Pool{
 	New: func() interface{} {
@@ -241,11 +220,6 @@ func prepareOpenAIChatRequest(messages []schemas.BifrostMessage, params *schemas
 // The input can be either a single string or a slice of strings for batch embedding.
 // Returns a BifrostResponse containing the embedding(s) and any error that occurred.
 func (provider *OpenAIProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) {
-	// Validate input texts are not empty
-	if len(input.Texts) == 0 {
-		return nil, newBifrostOperationError("input texts cannot be empty", nil, schemas.OpenAI)
-	}
-
 	// Prepare request body with base parameters
 	requestBody := map[string]interface{}{
 		"model": model,
@@ -302,74 +276,29 @@ func (provider *OpenAIProvider) Embedding(ctx context.Context, model string, key
 		return nil, parseOpenAIError(resp)
 	}
 
-	// Parse response
-	var response OpenAIResponse
-	if err := sonic.Unmarshal(resp.Body(), &response); err != nil {
-		return nil, newBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err, schemas.OpenAI)
-	}
+	responseBody := resp.Body()
 
-	// Create final response
-	bifrostResponse := &schemas.BifrostResponse{
-		ID:                response.ID,
-		Object:            response.Object,
-		Model:             response.Model,
-		Created:           response.Created,
-		Usage:             &response.Usage,
-		ServiceTier:       response.ServiceTier,
-		SystemFingerprint: response.SystemFingerprint,
-		ExtraFields: schemas.BifrostResponseExtraFields{
-			Provider: schemas.OpenAI,
-		},
-	}
+	// Pre-allocate response structs from pools
+	response := acquireOpenAIResponse()
+	defer releaseOpenAIResponse(response)
 
-	// Extract embeddings from response data
-	if len(response.Data) > 0 {
-		embeddings := make([][]float32, len(response.Data))
-		for i, data := range response.Data {
-			switch v := data.Embedding.(type) {
-			case []float32:
-				embeddings[i] = v
-			case []interface{}:
-				// Convert []interface{} to []float32
-				floatArray := make([]float32, len(v))
-				for j := range v {
-					if num, ok := v[j].(float64); ok {
-						floatArray[j] = float32(num)
-					} else {
-						return nil, newBifrostOperationError(fmt.Sprintf("unsupported number type in embedding array: %T", v[j]), nil, schemas.OpenAI)
-					}
-				}
-				embeddings[i] = floatArray
-			case string:
-				// Decode base64 string into float32 array
-				decodedData, err := base64.StdEncoding.DecodeString(v)
-				if err != nil {
-					return nil, newBifrostOperationError("failed to decode base64 embedding", err, schemas.OpenAI)
-				}
+	// Use enhanced response handler with pre-allocated response
+	rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse)
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
 
-				// Validate that decoded data length is divisible by 4 (size of float32)
-				const sizeOfFloat32 = 4
-				if len(decodedData)%sizeOfFloat32 != 0 {
-					return nil, newBifrostOperationError("malformed base64 embedding data: length not divisible by 4", nil, schemas.OpenAI)
-				}
+	response.ExtraFields.Provider = schemas.OpenAI
 
-				floats := make([]float32, len(decodedData)/sizeOfFloat32)
-				for i := 0; i < len(floats); i++ {
-					floats[i] = math.Float32frombits(binary.LittleEndian.Uint32(decodedData[i*4 : (i+1)*4]))
-				}
-				embeddings[i] = floats
-			default:
-				return nil, newBifrostOperationError(fmt.Sprintf("unsupported embedding type: %T", data.Embedding), nil, schemas.OpenAI)
-			}
-		}
-		bifrostResponse.Embedding = embeddings
+	if params != nil {
+		response.ExtraFields.Params = *params
 	}
 
-	if params != nil {
-		bifrostResponse.ExtraFields.Params = *params
+	if provider.sendBackRawResponse {
+		response.ExtraFields.RawResponse = rawResponse
 	}
 
-	return bifrostResponse, nil
+	return response, nil
 }
 
 // ChatCompletionStream handles streaming for OpenAI chat completions.
diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go
index 00ff577bb8..ebbaa456a8 100644
--- a/core/schemas/bifrost.go
+++ b/core/schemas/bifrost.go
@@ -373,9 +373,9 @@ type ImageURLStruct struct {
 // BifrostResponse represents the complete result from any bifrost request.
 type BifrostResponse struct {
 	ID                string                     `json:"id,omitempty"`
-	Object            string                     `json:"object,omitempty"` // text.completion, chat.completion, or embedding
+	Object            string                     `json:"object,omitempty"` // text.completion, chat.completion, embedding, speech, transcribe
 	Choices           []BifrostResponseChoice    `json:"choices,omitempty"`
-	Embedding         [][]float32                `json:"data,omitempty"`       // Maps to "data" field in provider responses (e.g., OpenAI embedding format)
+	Data              []BifrostEmbedding         `json:"data,omitempty"`       // Maps to "data" field in provider responses (e.g., OpenAI embedding format)
 	Speech            *BifrostSpeech             `json:"speech,omitempty"`     // Maps to "speech" field in provider responses (e.g., OpenAI speech format)
 	Transcribe        *BifrostTranscribe         `json:"transcribe,omitempty"` // Maps to "transcribe" field in provider responses (e.g., OpenAI transcription format)
 	Model             string                     `json:"model,omitempty"`
@@ -490,6 +490,56 @@ type Annotation struct {
 	Citation Citation `json:"url_citation"`
 }
 
+type BifrostEmbedding struct {
+	Index     int                      `json:"index"`
+	Object    string                   `json:"object"`    // embedding
+	Embedding BifrostEmbeddingResponse `json:"embedding"` // can be []float32 or string
+}
+
+type BifrostEmbeddingResponse struct {
+	EmbeddingStr     *string
+	EmbeddingArray   *[]float32
+	Embedding2DArray *[][]float32
+}
+
+func (be BifrostEmbeddingResponse) MarshalJSON() ([]byte, error) {
+	if be.EmbeddingStr != nil {
+		return sonic.Marshal(be.EmbeddingStr)
+	}
+	if be.EmbeddingArray != nil {
+		return sonic.Marshal(be.EmbeddingArray)
+	}
+	if be.Embedding2DArray != nil {
+		return sonic.Marshal(be.Embedding2DArray)
+	}
+	return nil, fmt.Errorf("no embedding found")
+}
+
+func (be *BifrostEmbeddingResponse) UnmarshalJSON(data []byte) error {
+	// First, try to unmarshal as a direct string
+	var stringContent string
+	if err := sonic.Unmarshal(data, &stringContent); err == nil {
+		be.EmbeddingStr = &stringContent
+		return nil
+	}
+
+	// Try to unmarshal as a direct array of float32
+	var arrayContent []float32
+	if err := sonic.Unmarshal(data, &arrayContent); err == nil {
+		be.EmbeddingArray = &arrayContent
+		return nil
+	}
+
+	// Try to unmarshal as a direct 2D array of float32
+	var arrayContent2D [][]float32
+	if err := sonic.Unmarshal(data, &arrayContent2D); err == nil {
+		be.Embedding2DArray = &arrayContent2D
+		return nil
+	}
+
+	return fmt.Errorf("embedding field is neither a string nor an array of float32 nor a 2D array of float32")
+}
+
 // BifrostResponseChoice represents a choice in the completion result.
 // This struct can represent either a streaming or non-streaming response choice.
 // IMPORTANT: Only one of BifrostNonStreamResponseChoice or BifrostStreamResponseChoice
diff --git a/transports/bifrost-http/integrations/openai/types.go b/transports/bifrost-http/integrations/openai/types.go
index 7133920d23..356c509f3a 100644
--- a/transports/bifrost-http/integrations/openai/types.go
+++ b/transports/bifrost-http/integrations/openai/types.go
@@ -96,19 +96,12 @@ type OpenAIChatResponse struct {
 
 // OpenAIEmbeddingResponse represents an OpenAI embedding response
 type OpenAIEmbeddingResponse struct {
-	Object            string            `json:"object"`
-	Data              []OpenAIEmbedding `json:"data"`
-	Model             string            `json:"model"`
-	Usage             *schemas.LLMUsage `json:"usage,omitempty"`
-	ServiceTier       *string           `json:"service_tier,omitempty"`
-	SystemFingerprint *string           `json:"system_fingerprint,omitempty"`
-}
-
-// OpenAIEmbedding represents a single embedding in the response
-type OpenAIEmbedding struct {
-	Object    string    `json:"object"`
-	Embedding []float32 `json:"embedding"`
-	Index     int       `json:"index"`
+	Object            string                     `json:"object"`
+	Data              []schemas.BifrostEmbedding `json:"data"`
+	Model             string                     `json:"model"`
+	Usage             *schemas.LLMUsage          `json:"usage,omitempty"`
+	ServiceTier       *string                    `json:"service_tier,omitempty"`
+	SystemFingerprint *string                    `json:"system_fingerprint,omitempty"`
 }
 
 // OpenAIChatError represents an OpenAI chat completion error response
@@ -434,22 +427,13 @@ func DeriveOpenAITranscriptionFromBifrostResponse(bifrostResp *schemas.BifrostRe
 
 // DeriveOpenAIEmbeddingFromBifrostResponse converts a Bifrost embedding response to OpenAI format
 func DeriveOpenAIEmbeddingFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *OpenAIEmbeddingResponse {
-	if bifrostResp == nil || bifrostResp.Embedding == nil {
+	if bifrostResp == nil || bifrostResp.Data == nil {
 		return nil
 	}
 
-	var embeddingData []OpenAIEmbedding
-	for i, embedding := range bifrostResp.Embedding {
-		embeddingData = append(embeddingData, OpenAIEmbedding{
-			Object:    "embedding",
-			Embedding: embedding,
-			Index:     i,
-		})
-	}
-
 	return &OpenAIEmbeddingResponse{
 		Object:            "list",
-		Data:              embeddingData,
+		Data:              bifrostResp.Data,
 		Model:             bifrostResp.Model,
 		Usage:             bifrostResp.Usage,
 		ServiceTier:       bifrostResp.ServiceTier,
diff --git a/transports/bifrost-http/plugins/logging/main.go b/transports/bifrost-http/plugins/logging/main.go
index 6329961c86..72d9aa196b 100644
--- a/transports/bifrost-http/plugins/logging/main.go
+++ b/transports/bifrost-http/plugins/logging/main.go
@@ -41,7 +41,7 @@ type UpdateLogData struct {
 	Status              string
 	TokenUsage          *schemas.LLMUsage
 	OutputMessage       *schemas.BifrostMessage
-	EmbeddingOutput     *[][]float32
+	EmbeddingOutput     *[]schemas.BifrostEmbedding
 	ToolCalls           *[]schemas.ToolCall
 	ErrorDetails        *schemas.BifrostError
 	Model               string                     // May be different from request
@@ -516,8 +516,8 @@ func (p *LoggerPlugin) PostHook(ctx *context.Context, result *schemas.BifrostRes
 				}
 			}
 
-			if result.Embedding != nil {
-				updateData.EmbeddingOutput = &result.Embedding
+			if result.Data != nil {
+				updateData.EmbeddingOutput = &result.Data
 			}
 
 			// Handle speech and transcription outputs for NON-streaming responses
diff --git a/transports/bifrost-http/plugins/logging/models.go b/transports/bifrost-http/plugins/logging/models.go
index 44b79a4713..09188b5f8f 100644
--- a/transports/bifrost-http/plugins/logging/models.go
+++ b/transports/bifrost-http/plugins/logging/models.go
@@ -45,7 +45,7 @@ type LogEntry struct {
 	// Virtual fields for JSON output - these will be populated when needed
 	InputHistoryParsed        []schemas.BifrostMessage    `gorm:"-" json:"input_history,omitempty"`
 	OutputMessageParsed       *schemas.BifrostMessage     `gorm:"-" json:"output_message,omitempty"`
-	EmbeddingOutputParsed     *[][]float32                `gorm:"-" json:"embedding_output,omitempty"`
+	EmbeddingOutputParsed     *[]schemas.BifrostEmbedding `gorm:"-" json:"embedding_output,omitempty"`
 	ParamsParsed              *schemas.ModelParameters    `gorm:"-" json:"params,omitempty"`
 	ToolsParsed               *[]schemas.Tool             `gorm:"-" json:"tools,omitempty"`
 	ToolCallsParsed           *[]schemas.ToolCall         `gorm:"-" json:"tool_calls,omitempty"`
diff --git a/transports/bifrost-http/plugins/telemetry/main.go b/transports/bifrost-http/plugins/telemetry/main.go
index 47c5281d3a..57d0575db9 100644
--- a/transports/bifrost-http/plugins/telemetry/main.go
+++ b/transports/bifrost-http/plugins/telemetry/main.go
@@ -59,6 +59,8 @@ func (p *PrometheusPlugin) PreHook(ctx *context.Context, req *schemas.BifrostReq
 		*ctx = context.WithValue(*ctx, methodKey, "chat")
 	} else if req.Input.TextCompletionInput != nil {
 		*ctx = context.WithValue(*ctx, methodKey, "text")
+	} else if req.Input.EmbeddingInput != nil {
+		*ctx = context.WithValue(*ctx, methodKey, "embedding")
 	} else if req.Input.SpeechInput != nil {
 		*ctx = context.WithValue(*ctx, methodKey, "speech")
 	} else if req.Input.TranscriptionInput != nil {
diff --git a/transports/go.mod b/transports/go.mod
index 815041707f..68c44f5301 100644
--- a/transports/go.mod
+++ b/transports/go.mod
@@ -16,6 +16,8 @@ require (
 	gorm.io/gorm v1.30.0
 )
 
+replace github.com/maximhq/bifrost/core => ../core
+
 require (
 	cloud.google.com/go v0.121.0 // indirect
 	cloud.google.com/go/auth v0.16.0 // indirect
@@ -35,11 +37,9 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect
 	github.com/aws/smithy-go v1.22.3 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/bytedance/sonic v1.14.0 // indirect
 	github.com/bytedance/sonic/loader v0.3.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/cloudwego/base64x v0.1.5 // indirect
-	github.com/cloudwego/base64x v0.1.5 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -52,7 +52,6 @@ require (
 	github.com/jinzhu/now v1.1.5 // indirect
 	github.com/klauspost/compress v1.18.0 // indirect
 	github.com/klauspost/cpuid/v2 v2.0.9 // indirect
-	github.com/klauspost/cpuid/v2 v2.0.9 // indirect
 	github.com/mark3labs/mcp-go v0.32.0 // indirect
 	github.com/mattn/go-sqlite3 v1.14.28 // indirect
 	github.com/maximhq/maxim-go v0.1.3 // indirect
@@ -63,7 +62,6 @@ require (
 	github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 // indirect
 	github.com/spf13/cast v1.7.1 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
@@ -72,7 +70,6 @@ require (
 	go.opentelemetry.io/otel/metric v1.35.0 // indirect
 	go.opentelemetry.io/otel/trace v1.35.0 // indirect
 	golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
-	golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
 	golang.org/x/crypto v0.38.0 // indirect
 	golang.org/x/net v0.40.0 // indirect
 	golang.org/x/oauth2 v0.30.0 // indirect
@@ -82,4 +79,3 @@ require (
 	google.golang.org/grpc v1.72.0 // indirect
 	google.golang.org/protobuf v1.36.6 // indirect
 )
-
diff --git a/transports/go.sum b/transports/go.sum
index d811e40ccd..d8dbd56357 100644
--- a/transports/go.sum
+++ b/transports/go.sum
@@ -93,8 +93,6 @@ github.com/mark3labs/mcp-go v0.32.0 h1:fgwmbfL2gbd67obg57OfV2Dnrhs1HtSdlY/i5fn7M
 github.com/mark3labs/mcp-go v0.32.0/go.mod h1:rXqOudj/djTORU/ThxYx8fqEVj/5pvTuuebQ2RC7uk4=
 github.com/mattn/go-sqlite3 v1.14.28 h1:ThEiQrnbtumT+QMknw63Befp/ce/nUPgBPMlRFEum7A=
 github.com/mattn/go-sqlite3 v1.14.28/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
-github.com/maximhq/bifrost/core v1.1.13 h1:lTkoXL5OrvPD8rQtrVSaBHJN7jSj+PSawiO+buEo1Io=
-github.com/maximhq/bifrost/core v1.1.13/go.mod h1:Wa/BtJoHZ0+RXYomGeAL+wyBu6iD1h6vMiUHF5RTlkA=
 github.com/maximhq/bifrost/plugins/maxim v1.0.6 h1:m1tWjbmxW9Lz4mDhXclQhZdFt/TrRPbZwFcoWY9ZAEk=
 github.com/maximhq/bifrost/plugins/maxim v1.0.6/go.mod h1:+D/E498VB4JNTEzG4fYyFJf9WQaq/9FgYrmzl49mLNc=
 github.com/maximhq/maxim-go v0.1.3 h1:nVzdz3hEjZVxmWHARWIM+Yrn1Jp50qrsK4BA/sz2jj8=
diff --git a/ui/components/logs/log-detail-sheet.tsx b/ui/components/logs/log-detail-sheet.tsx
index 5101c1ea92..6884513910 100644
--- a/ui/components/logs/log-detail-sheet.tsx
+++ b/ui/components/logs/log-detail-sheet.tsx
@@ -239,7 +239,11 @@ export function LogDetailSheet({ log, open, onOpenChange }: LogDetailSheetProps)
                 <LogMessageView
                   message={{
                     role: 'assistant',
-                    content: JSON.stringify(log.embedding_output, null, 2),
+                    content: JSON.stringify(
+                      log.embedding_output.map((embedding) => embedding.embedding),
+                      null,
+                      2,
+                    ),
                   }}
                 />
               </>
diff --git a/ui/lib/types/logs.ts b/ui/lib/types/logs.ts
index 8fe3f7762c..9513279251 100644
--- a/ui/lib/types/logs.ts
+++ b/ui/lib/types/logs.ts
@@ -107,6 +107,12 @@ export interface BifrostMessage {
   thought?: string
 }
 
+export interface BifrostEmbedding {
+  index: number
+  object: string
+  embedding: string | number[] | number[][]
+}
+
 // Tool related types
 export interface FunctionParameters {
   type: string
@@ -217,7 +223,7 @@ export interface LogEntry {
   model: string
   input_history: BifrostMessage[]
   output_message?: BifrostMessage
-  embedding_output?: number[][]
+  embedding_output?: BifrostEmbedding[]
   params?: ModelParameters
   speech_input?: SpeechInput
   transcription_input?: TranscriptionInput