diff --git a/core/providers/azure.go b/core/providers/azure.go index e41015d3e2..c802eac029 100644 --- a/core/providers/azure.go +++ b/core/providers/azure.go @@ -31,32 +31,6 @@ type AzureTextResponse struct { Usage schemas.LLMUsage `json:"usage"` // Token usage statistics } -// AzureChatResponse represents the response structure from Azure's chat completion API. -// It includes completion choices, model information, and usage statistics. -type AzureChatResponse struct { - ID string `json:"id"` // Unique identifier for the completion - Object string `json:"object"` // Type of completion (always "chat.completion") - Choices []schemas.BifrostResponseChoice `json:"choices"` // Array of completion choices - Model string `json:"model"` // Model used for the completion - Created int `json:"created"` // Unix timestamp of completion creation - SystemFingerprint *string `json:"system_fingerprint"` // System fingerprint for the request - Usage schemas.LLMUsage `json:"usage"` // Token usage statistics -} - -// AzureEmbeddingResponse represents the response structure from Azure's embedding API. -type AzureEmbeddingResponse struct { - Object string `json:"object"` - Data []struct { - Object string `json:"object"` - Embedding interface{} `json:"embedding"` - Index int `json:"index"` - } `json:"data"` - Model string `json:"model"` - Usage schemas.LLMUsage `json:"usage"` - ID string `json:"id"` - SystemFingerprint *string `json:"system_fingerprint"` -} - // AzureError represents the error response structure from Azure's API. // It includes error code and message information. type AzureError struct { @@ -79,19 +53,19 @@ var azureTextCompletionResponsePool = sync.Pool{ // azureChatResponsePool provides a pool for Azure chat response objects. var azureChatResponsePool = sync.Pool{ New: func() interface{} { - return &AzureChatResponse{} + return &schemas.BifrostResponse{} }, } // acquireAzureChatResponse gets an Azure chat response from the pool and resets it. -func acquireAzureChatResponse() *AzureChatResponse { - resp := azureChatResponsePool.Get().(*AzureChatResponse) - *resp = AzureChatResponse{} // Reset the struct +func acquireAzureChatResponse() *schemas.BifrostResponse { + resp := azureChatResponsePool.Get().(*schemas.BifrostResponse) + *resp = schemas.BifrostResponse{} // Reset the struct return resp } // releaseAzureChatResponse returns an Azure chat response to the pool. -func releaseAzureChatResponse(resp *AzureChatResponse) { +func releaseAzureChatResponse(resp *schemas.BifrostResponse) { if resp != nil { azureChatResponsePool.Put(resp) } @@ -139,7 +113,7 @@ func NewAzureProvider(config *schemas.ProviderConfig, logger schemas.Logger) (*A // Pre-warm response pools for range config.ConcurrencyAndBufferSize.Concurrency { - azureChatResponsePool.Put(&AzureChatResponse{}) + azureChatResponsePool.Put(&schemas.BifrostResponse{}) azureTextCompletionResponsePool.Put(&AzureTextResponse{}) } @@ -342,39 +316,24 @@ func (provider *AzureProvider) ChatCompletion(ctx context.Context, model string, return nil, bifrostErr } - // Create final response - bifrostResponse := &schemas.BifrostResponse{ - ID: response.ID, - Choices: response.Choices, - Model: response.Model, - Created: response.Created, - SystemFingerprint: response.SystemFingerprint, - Usage: &response.Usage, - ExtraFields: schemas.BifrostResponseExtraFields{ - Provider: schemas.Azure, - }, - } + response.ExtraFields.Provider = schemas.Azure // Set raw response if enabled if provider.sendBackRawResponse { - bifrostResponse.ExtraFields.RawResponse = rawResponse + response.ExtraFields.RawResponse = rawResponse } if params != nil { - bifrostResponse.ExtraFields.Params = *params + response.ExtraFields.Params = *params } - return bifrostResponse, nil + return response, nil } // Embedding generates embeddings for the given input text(s) using Azure OpenAI. // The input can be either a single string or a slice of strings for batch embedding. // Returns a BifrostResponse containing the embedding(s) and any error that occurred. func (provider *AzureProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) { - if len(input.Texts) == 0 { - return nil, newBifrostOperationError("no input text provided for embedding", nil, schemas.Azure) - } - // Prepare request body - Azure uses deployment-scoped URLs, so model is not needed in body requestBody := map[string]interface{}{ "input": input.Texts, @@ -399,61 +358,27 @@ func (provider *AzureProvider) Embedding(ctx context.Context, model string, key return nil, err } - // Parse response - var response AzureEmbeddingResponse - if err := sonic.Unmarshal(responseBody, &response); err != nil { - return nil, newBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err, schemas.Azure) - } + // Pre-allocate response structs from pools + response := acquireAzureChatResponse() + defer releaseAzureChatResponse(response) - bifrostResponse := &schemas.BifrostResponse{ - ID: response.ID, - Object: response.Object, - Model: response.Model, - Usage: &response.Usage, - SystemFingerprint: response.SystemFingerprint, - ExtraFields: schemas.BifrostResponseExtraFields{ - Provider: schemas.Azure, - RawResponse: responseBody, - }, + // Use enhanced response handler with pre-allocated response + rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse) + if bifrostErr != nil { + return nil, bifrostErr } - // Extract embeddings from response data - if len(response.Data) > 0 { - embeddings := make([][]float32, len(response.Data)) - for i, data := range response.Data { - switch v := data.Embedding.(type) { - case []float32: - embeddings[i] = v - case []float64: - // Direct conversion from []float64 to []float32 - floatArray := make([]float32, len(v)) - for j := range v { - floatArray[j] = float32(v[j]) - } - embeddings[i] = floatArray - case []interface{}: - // Fallback: element-by-element conversion for []interface{} - floatArray := make([]float32, len(v)) - for j := range v { - if num, ok := v[j].(float64); ok { - floatArray[j] = float32(num) - } else { - return nil, newBifrostOperationError(fmt.Sprintf("unsupported number type in embedding array: %T", v[j]), nil, schemas.Azure) - } - } - embeddings[i] = floatArray - default: - return nil, newBifrostOperationError(fmt.Sprintf("unsupported embedding type: %T", data.Embedding), nil, schemas.Azure) - } - } - bifrostResponse.Embedding = embeddings - } + response.ExtraFields.Provider = schemas.Azure if params != nil { - bifrostResponse.ExtraFields.Params = *params + response.ExtraFields.Params = *params } - return bifrostResponse, nil + if provider.sendBackRawResponse { + response.ExtraFields.RawResponse = rawResponse + } + + return response, nil } // ChatCompletionStream performs a streaming chat completion request to Azure's OpenAI API. diff --git a/core/providers/bedrock.go b/core/providers/bedrock.go index 8e2f3a38e3..7dde654bf5 100644 --- a/core/providers/bedrock.go +++ b/core/providers/bedrock.go @@ -1130,14 +1130,6 @@ func (provider *BedrockProvider) Embedding(ctx context.Context, model string, ke // handleTitanEmbedding handles embedding requests for Amazon Titan models. func (provider *BedrockProvider) handleTitanEmbedding(ctx context.Context, model string, key string, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) { - // Titan Text Embeddings V1/V2 - only supports single text input - if len(input.Texts) == 0 { - return nil, newConfigurationError("no input text provided for embedding", schemas.Bedrock) - } - if len(input.Texts) > 1 { - return nil, newConfigurationError("Amazon Titan embedding models support only single text input, received multiple texts", schemas.Bedrock) - } - requestBody := map[string]interface{}{ "inputText": input.Texts[0], } @@ -1171,8 +1163,17 @@ func (provider *BedrockProvider) handleTitanEmbedding(ctx context.Context, model } bifrostResponse := &schemas.BifrostResponse{ - Embedding: [][]float32{titanResp.Embedding}, - Model: model, + Object: "list", + Data: []schemas.BifrostEmbedding{ + { + Index: 0, + Object: "embedding", + Embedding: schemas.BifrostEmbeddingResponse{ + Embedding2DArray: &[][]float32{titanResp.Embedding}, + }, + }, + }, + Model: model, Usage: &schemas.LLMUsage{ PromptTokens: titanResp.InputTextTokenCount, TotalTokens: titanResp.InputTextTokenCount, @@ -1192,10 +1193,6 @@ func (provider *BedrockProvider) handleTitanEmbedding(ctx context.Context, model // handleCohereEmbedding handles embedding requests for Cohere models on Bedrock. func (provider *BedrockProvider) handleCohereEmbedding(ctx context.Context, model string, key string, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) { - if len(input.Texts) == 0 { - return nil, newConfigurationError("no input text provided for embedding", schemas.Bedrock) - } - requestBody := map[string]interface{}{ "texts": input.Texts, "input_type": "search_document", @@ -1225,9 +1222,18 @@ func (provider *BedrockProvider) handleCohereEmbedding(ctx context.Context, mode totalInputTokens := approximateTokenCount(input.Texts) bifrostResponse := &schemas.BifrostResponse{ - Embedding: cohereResp.Embeddings, - ID: cohereResp.ID, - Model: model, + Object: "list", + Data: []schemas.BifrostEmbedding{ + { + Index: 0, + Object: "embedding", + Embedding: schemas.BifrostEmbeddingResponse{ + Embedding2DArray: &cohereResp.Embeddings, + }, + }, + }, + ID: cohereResp.ID, + Model: model, Usage: &schemas.LLMUsage{ PromptTokens: totalInputTokens, TotalTokens: totalInputTokens, diff --git a/core/providers/cohere.go b/core/providers/cohere.go index 0135cccc59..7ab86387b6 100644 --- a/core/providers/cohere.go +++ b/core/providers/cohere.go @@ -596,10 +596,6 @@ func convertChatHistory(history []struct { // Embedding generates embeddings for the given input text(s) using the Cohere API. // Supports Cohere's embedding models and returns a BifrostResponse containing the embedding(s). func (provider *CohereProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) { - if len(input.Texts) == 0 { - return nil, newConfigurationError("no input text provided for embedding", schemas.Cohere) - } - // Prepare request body with default values requestBody := map[string]interface{}{ "texts": input.Texts, @@ -683,9 +679,18 @@ func (provider *CohereProvider) Embedding(ctx context.Context, model string, key // Create BifrostResponse bifrostResponse := &schemas.BifrostResponse{ - ID: cohereResp.ID, - Embedding: cohereResp.Embeddings.Float, - Model: model, + ID: cohereResp.ID, + Object: "list", + Data: []schemas.BifrostEmbedding{ + { + Index: 0, + Object: "embedding", + Embedding: schemas.BifrostEmbeddingResponse{ + Embedding2DArray: &cohereResp.Embeddings.Float, + }, + }, + }, + Model: model, Usage: &schemas.LLMUsage{ PromptTokens: totalInputTokens, TotalTokens: totalInputTokens, diff --git a/core/providers/mistral.go b/core/providers/mistral.go index a32c30879b..9910edb070 100644 --- a/core/providers/mistral.go +++ b/core/providers/mistral.go @@ -15,20 +15,6 @@ import ( "github.com/valyala/fasthttp" ) -// MistralEmbeddingResponse represents the response structure from Mistral's embedding API. -type MistralEmbeddingResponse struct { - Object string `json:"object"` - Data []struct { - Object string `json:"object"` - Embedding []float32 `json:"embedding"` - Index int `json:"index"` - } `json:"data"` - Model string `json:"model"` - Usage schemas.LLMUsage `json:"usage"` - ID string `json:"id"` - SystemFingerprint *string `json:"system_fingerprint"` -} - // mistralResponsePool provides a pool for Mistral response objects. var mistralResponsePool = sync.Pool{ New: func() interface{} { @@ -183,10 +169,6 @@ func (provider *MistralProvider) ChatCompletion(ctx context.Context, model strin // Embedding generates embeddings for the given input text(s) using the Mistral API. // Supports Mistral's embedding models and returns a BifrostResponse containing the embedding(s). func (provider *MistralProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) { - if len(input.Texts) == 0 { - return nil, newConfigurationError("no input text provided for embedding", schemas.Mistral) - } - // Prepare request body with base parameters requestBody := map[string]interface{}{ "model": model, @@ -254,46 +236,29 @@ func (provider *MistralProvider) Embedding(ctx context.Context, model string, ke return nil, bifrostErr } - // Parse response using sonic.RawMessage to avoid double parsing - rawMessage := resp.Body() + responseBody := resp.Body() - // Parse into structured response - var mistralResp MistralEmbeddingResponse - if err := sonic.Unmarshal(rawMessage, &mistralResp); err != nil { - return nil, newBifrostOperationError("error parsing Mistral embedding response", err, schemas.Mistral) - } + // Pre-allocate response structs from pools + response := acquireMistralResponse() + defer releaseMistralResponse(response) - // Parse raw response for consistent format - var rawResponse interface{} - if err := sonic.Unmarshal(rawMessage, &rawResponse); err != nil { - return nil, newBifrostOperationError("error parsing raw response for Mistral embedding", err, schemas.Mistral) + // Use enhanced response handler with pre-allocated response + rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse) + if bifrostErr != nil { + return nil, bifrostErr } - // Convert data to embeddings array - var embeddings [][]float32 - for _, data := range mistralResp.Data { - embeddings = append(embeddings, data.Embedding) - } + response.ExtraFields.Provider = schemas.Mistral - // Create BifrostResponse - bifrostResponse := &schemas.BifrostResponse{ - ID: mistralResp.ID, - Object: mistralResp.Object, - Embedding: embeddings, - Model: mistralResp.Model, - Usage: &mistralResp.Usage, - SystemFingerprint: mistralResp.SystemFingerprint, - ExtraFields: schemas.BifrostResponseExtraFields{ - Provider: schemas.Mistral, - RawResponse: rawResponse, - }, + if params != nil { + response.ExtraFields.Params = *params } - if params != nil { - bifrostResponse.ExtraFields.Params = *params + if provider.sendBackRawResponse { + response.ExtraFields.RawResponse = rawResponse } - return bifrostResponse, nil + return response, nil } // ChatCompletionStream performs a streaming chat completion request to the Mistral API. diff --git a/core/providers/openai.go b/core/providers/openai.go index dd5cef215d..1714f62417 100644 --- a/core/providers/openai.go +++ b/core/providers/openai.go @@ -6,11 +6,8 @@ import ( "bufio" "bytes" "context" - "encoding/base64" - "encoding/binary" "fmt" "io" - "math" "mime/multipart" "net/http" "strings" @@ -22,24 +19,6 @@ import ( "github.com/valyala/fasthttp" ) -// OpenAIResponse represents the response structure from the OpenAI API. -// It includes completion choices, model information, and usage statistics. -type OpenAIResponse struct { - ID string `json:"id"` // Unique identifier for the completion - Object string `json:"object"` // Type of completion (text.completion, chat.completion, or embedding) - Choices []schemas.BifrostResponseChoice `json:"choices"` // Array of completion choices - Data []struct { // Embedding data - Object string `json:"object"` - Embedding any `json:"embedding"` - Index int `json:"index"` - } `json:"data,omitempty"` - Model string `json:"model"` // Model used for the completion - Created int `json:"created"` // Unix timestamp of completion creation - ServiceTier *string `json:"service_tier"` // Service tier used for the request - SystemFingerprint *string `json:"system_fingerprint"` // System fingerprint for the request - Usage schemas.LLMUsage `json:"usage"` // Token usage statistics -} - // openAIResponsePool provides a pool for OpenAI response objects. var openAIResponsePool = sync.Pool{ New: func() interface{} { @@ -241,11 +220,6 @@ func prepareOpenAIChatRequest(messages []schemas.BifrostMessage, params *schemas // The input can be either a single string or a slice of strings for batch embedding. // Returns a BifrostResponse containing the embedding(s) and any error that occurred. func (provider *OpenAIProvider) Embedding(ctx context.Context, model string, key schemas.Key, input *schemas.EmbeddingInput, params *schemas.ModelParameters) (*schemas.BifrostResponse, *schemas.BifrostError) { - // Validate input texts are not empty - if len(input.Texts) == 0 { - return nil, newBifrostOperationError("input texts cannot be empty", nil, schemas.OpenAI) - } - // Prepare request body with base parameters requestBody := map[string]interface{}{ "model": model, @@ -302,74 +276,29 @@ func (provider *OpenAIProvider) Embedding(ctx context.Context, model string, key return nil, parseOpenAIError(resp) } - // Parse response - var response OpenAIResponse - if err := sonic.Unmarshal(resp.Body(), &response); err != nil { - return nil, newBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err, schemas.OpenAI) - } + responseBody := resp.Body() - // Create final response - bifrostResponse := &schemas.BifrostResponse{ - ID: response.ID, - Object: response.Object, - Model: response.Model, - Created: response.Created, - Usage: &response.Usage, - ServiceTier: response.ServiceTier, - SystemFingerprint: response.SystemFingerprint, - ExtraFields: schemas.BifrostResponseExtraFields{ - Provider: schemas.OpenAI, - }, - } + // Pre-allocate response structs from pools + response := acquireOpenAIResponse() + defer releaseOpenAIResponse(response) - // Extract embeddings from response data - if len(response.Data) > 0 { - embeddings := make([][]float32, len(response.Data)) - for i, data := range response.Data { - switch v := data.Embedding.(type) { - case []float32: - embeddings[i] = v - case []interface{}: - // Convert []interface{} to []float32 - floatArray := make([]float32, len(v)) - for j := range v { - if num, ok := v[j].(float64); ok { - floatArray[j] = float32(num) - } else { - return nil, newBifrostOperationError(fmt.Sprintf("unsupported number type in embedding array: %T", v[j]), nil, schemas.OpenAI) - } - } - embeddings[i] = floatArray - case string: - // Decode base64 string into float32 array - decodedData, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return nil, newBifrostOperationError("failed to decode base64 embedding", err, schemas.OpenAI) - } + // Use enhanced response handler with pre-allocated response + rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse) + if bifrostErr != nil { + return nil, bifrostErr + } - // Validate that decoded data length is divisible by 4 (size of float32) - const sizeOfFloat32 = 4 - if len(decodedData)%sizeOfFloat32 != 0 { - return nil, newBifrostOperationError("malformed base64 embedding data: length not divisible by 4", nil, schemas.OpenAI) - } + response.ExtraFields.Provider = schemas.OpenAI - floats := make([]float32, len(decodedData)/sizeOfFloat32) - for i := 0; i < len(floats); i++ { - floats[i] = math.Float32frombits(binary.LittleEndian.Uint32(decodedData[i*4 : (i+1)*4])) - } - embeddings[i] = floats - default: - return nil, newBifrostOperationError(fmt.Sprintf("unsupported embedding type: %T", data.Embedding), nil, schemas.OpenAI) - } - } - bifrostResponse.Embedding = embeddings + if params != nil { + response.ExtraFields.Params = *params } - if params != nil { - bifrostResponse.ExtraFields.Params = *params + if provider.sendBackRawResponse { + response.ExtraFields.RawResponse = rawResponse } - return bifrostResponse, nil + return response, nil } // ChatCompletionStream handles streaming for OpenAI chat completions. diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go index 00ff577bb8..ebbaa456a8 100644 --- a/core/schemas/bifrost.go +++ b/core/schemas/bifrost.go @@ -373,9 +373,9 @@ type ImageURLStruct struct { // BifrostResponse represents the complete result from any bifrost request. type BifrostResponse struct { ID string `json:"id,omitempty"` - Object string `json:"object,omitempty"` // text.completion, chat.completion, or embedding + Object string `json:"object,omitempty"` // text.completion, chat.completion, embedding, speech, transcribe Choices []BifrostResponseChoice `json:"choices,omitempty"` - Embedding [][]float32 `json:"data,omitempty"` // Maps to "data" field in provider responses (e.g., OpenAI embedding format) + Data []BifrostEmbedding `json:"data,omitempty"` // Maps to "data" field in provider responses (e.g., OpenAI embedding format) Speech *BifrostSpeech `json:"speech,omitempty"` // Maps to "speech" field in provider responses (e.g., OpenAI speech format) Transcribe *BifrostTranscribe `json:"transcribe,omitempty"` // Maps to "transcribe" field in provider responses (e.g., OpenAI transcription format) Model string `json:"model,omitempty"` @@ -490,6 +490,56 @@ type Annotation struct { Citation Citation `json:"url_citation"` } +type BifrostEmbedding struct { + Index int `json:"index"` + Object string `json:"object"` // embedding + Embedding BifrostEmbeddingResponse `json:"embedding"` // can be []float32 or string +} + +type BifrostEmbeddingResponse struct { + EmbeddingStr *string + EmbeddingArray *[]float32 + Embedding2DArray *[][]float32 +} + +func (be BifrostEmbeddingResponse) MarshalJSON() ([]byte, error) { + if be.EmbeddingStr != nil { + return sonic.Marshal(be.EmbeddingStr) + } + if be.EmbeddingArray != nil { + return sonic.Marshal(be.EmbeddingArray) + } + if be.Embedding2DArray != nil { + return sonic.Marshal(be.Embedding2DArray) + } + return nil, fmt.Errorf("no embedding found") +} + +func (be *BifrostEmbeddingResponse) UnmarshalJSON(data []byte) error { + // First, try to unmarshal as a direct string + var stringContent string + if err := sonic.Unmarshal(data, &stringContent); err == nil { + be.EmbeddingStr = &stringContent + return nil + } + + // Try to unmarshal as a direct array of float32 + var arrayContent []float32 + if err := sonic.Unmarshal(data, &arrayContent); err == nil { + be.EmbeddingArray = &arrayContent + return nil + } + + // Try to unmarshal as a direct 2D array of float32 + var arrayContent2D [][]float32 + if err := sonic.Unmarshal(data, &arrayContent2D); err == nil { + be.Embedding2DArray = &arrayContent2D + return nil + } + + return fmt.Errorf("embedding field is neither a string nor an array of float32 nor a 2D array of float32") +} + // BifrostResponseChoice represents a choice in the completion result. // This struct can represent either a streaming or non-streaming response choice. // IMPORTANT: Only one of BifrostNonStreamResponseChoice or BifrostStreamResponseChoice diff --git a/transports/bifrost-http/integrations/openai/types.go b/transports/bifrost-http/integrations/openai/types.go index 7133920d23..356c509f3a 100644 --- a/transports/bifrost-http/integrations/openai/types.go +++ b/transports/bifrost-http/integrations/openai/types.go @@ -96,19 +96,12 @@ type OpenAIChatResponse struct { // OpenAIEmbeddingResponse represents an OpenAI embedding response type OpenAIEmbeddingResponse struct { - Object string `json:"object"` - Data []OpenAIEmbedding `json:"data"` - Model string `json:"model"` - Usage *schemas.LLMUsage `json:"usage,omitempty"` - ServiceTier *string `json:"service_tier,omitempty"` - SystemFingerprint *string `json:"system_fingerprint,omitempty"` -} - -// OpenAIEmbedding represents a single embedding in the response -type OpenAIEmbedding struct { - Object string `json:"object"` - Embedding []float32 `json:"embedding"` - Index int `json:"index"` + Object string `json:"object"` + Data []schemas.BifrostEmbedding `json:"data"` + Model string `json:"model"` + Usage *schemas.LLMUsage `json:"usage,omitempty"` + ServiceTier *string `json:"service_tier,omitempty"` + SystemFingerprint *string `json:"system_fingerprint,omitempty"` } // OpenAIChatError represents an OpenAI chat completion error response @@ -434,22 +427,13 @@ func DeriveOpenAITranscriptionFromBifrostResponse(bifrostResp *schemas.BifrostRe // DeriveOpenAIEmbeddingFromBifrostResponse converts a Bifrost embedding response to OpenAI format func DeriveOpenAIEmbeddingFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *OpenAIEmbeddingResponse { - if bifrostResp == nil || bifrostResp.Embedding == nil { + if bifrostResp == nil || bifrostResp.Data == nil { return nil } - var embeddingData []OpenAIEmbedding - for i, embedding := range bifrostResp.Embedding { - embeddingData = append(embeddingData, OpenAIEmbedding{ - Object: "embedding", - Embedding: embedding, - Index: i, - }) - } - return &OpenAIEmbeddingResponse{ Object: "list", - Data: embeddingData, + Data: bifrostResp.Data, Model: bifrostResp.Model, Usage: bifrostResp.Usage, ServiceTier: bifrostResp.ServiceTier, diff --git a/transports/bifrost-http/plugins/logging/main.go b/transports/bifrost-http/plugins/logging/main.go index 6329961c86..72d9aa196b 100644 --- a/transports/bifrost-http/plugins/logging/main.go +++ b/transports/bifrost-http/plugins/logging/main.go @@ -41,7 +41,7 @@ type UpdateLogData struct { Status string TokenUsage *schemas.LLMUsage OutputMessage *schemas.BifrostMessage - EmbeddingOutput *[][]float32 + EmbeddingOutput *[]schemas.BifrostEmbedding ToolCalls *[]schemas.ToolCall ErrorDetails *schemas.BifrostError Model string // May be different from request @@ -516,8 +516,8 @@ func (p *LoggerPlugin) PostHook(ctx *context.Context, result *schemas.BifrostRes } } - if result.Embedding != nil { - updateData.EmbeddingOutput = &result.Embedding + if result.Data != nil { + updateData.EmbeddingOutput = &result.Data } // Handle speech and transcription outputs for NON-streaming responses diff --git a/transports/bifrost-http/plugins/logging/models.go b/transports/bifrost-http/plugins/logging/models.go index 44b79a4713..09188b5f8f 100644 --- a/transports/bifrost-http/plugins/logging/models.go +++ b/transports/bifrost-http/plugins/logging/models.go @@ -45,7 +45,7 @@ type LogEntry struct { // Virtual fields for JSON output - these will be populated when needed InputHistoryParsed []schemas.BifrostMessage `gorm:"-" json:"input_history,omitempty"` OutputMessageParsed *schemas.BifrostMessage `gorm:"-" json:"output_message,omitempty"` - EmbeddingOutputParsed *[][]float32 `gorm:"-" json:"embedding_output,omitempty"` + EmbeddingOutputParsed *[]schemas.BifrostEmbedding `gorm:"-" json:"embedding_output,omitempty"` ParamsParsed *schemas.ModelParameters `gorm:"-" json:"params,omitempty"` ToolsParsed *[]schemas.Tool `gorm:"-" json:"tools,omitempty"` ToolCallsParsed *[]schemas.ToolCall `gorm:"-" json:"tool_calls,omitempty"` diff --git a/transports/bifrost-http/plugins/telemetry/main.go b/transports/bifrost-http/plugins/telemetry/main.go index 47c5281d3a..57d0575db9 100644 --- a/transports/bifrost-http/plugins/telemetry/main.go +++ b/transports/bifrost-http/plugins/telemetry/main.go @@ -59,6 +59,8 @@ func (p *PrometheusPlugin) PreHook(ctx *context.Context, req *schemas.BifrostReq *ctx = context.WithValue(*ctx, methodKey, "chat") } else if req.Input.TextCompletionInput != nil { *ctx = context.WithValue(*ctx, methodKey, "text") + } else if req.Input.EmbeddingInput != nil { + *ctx = context.WithValue(*ctx, methodKey, "embedding") } else if req.Input.SpeechInput != nil { *ctx = context.WithValue(*ctx, methodKey, "speech") } else if req.Input.TranscriptionInput != nil { diff --git a/transports/go.mod b/transports/go.mod index 815041707f..68c44f5301 100644 --- a/transports/go.mod +++ b/transports/go.mod @@ -16,6 +16,8 @@ require ( gorm.io/gorm v1.30.0 ) +replace github.com/maximhq/bifrost/core => ../core + require ( cloud.google.com/go v0.121.0 // indirect cloud.google.com/go/auth v0.16.0 // indirect @@ -35,11 +37,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect github.com/aws/smithy-go v1.22.3 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bytedance/sonic v1.14.0 // indirect github.com/bytedance/sonic/loader v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.5 // indirect - github.com/cloudwego/base64x v0.1.5 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -52,7 +52,6 @@ require ( github.com/jinzhu/now v1.1.5 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid/v2 v2.0.9 // indirect - github.com/klauspost/cpuid/v2 v2.0.9 // indirect github.com/mark3labs/mcp-go v0.32.0 // indirect github.com/mattn/go-sqlite3 v1.14.28 // indirect github.com/maximhq/maxim-go v0.1.3 // indirect @@ -63,7 +62,6 @@ require ( github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 // indirect github.com/spf13/cast v1.7.1 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect @@ -72,7 +70,6 @@ require ( go.opentelemetry.io/otel/metric v1.35.0 // indirect go.opentelemetry.io/otel/trace v1.35.0 // indirect golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect - golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect golang.org/x/crypto v0.38.0 // indirect golang.org/x/net v0.40.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect @@ -82,4 +79,3 @@ require ( google.golang.org/grpc v1.72.0 // indirect google.golang.org/protobuf v1.36.6 // indirect ) - diff --git a/transports/go.sum b/transports/go.sum index d811e40ccd..d8dbd56357 100644 --- a/transports/go.sum +++ b/transports/go.sum @@ -93,8 +93,6 @@ github.com/mark3labs/mcp-go v0.32.0 h1:fgwmbfL2gbd67obg57OfV2Dnrhs1HtSdlY/i5fn7M github.com/mark3labs/mcp-go v0.32.0/go.mod h1:rXqOudj/djTORU/ThxYx8fqEVj/5pvTuuebQ2RC7uk4= github.com/mattn/go-sqlite3 v1.14.28 h1:ThEiQrnbtumT+QMknw63Befp/ce/nUPgBPMlRFEum7A= github.com/mattn/go-sqlite3 v1.14.28/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.1.13 h1:lTkoXL5OrvPD8rQtrVSaBHJN7jSj+PSawiO+buEo1Io= -github.com/maximhq/bifrost/core v1.1.13/go.mod h1:Wa/BtJoHZ0+RXYomGeAL+wyBu6iD1h6vMiUHF5RTlkA= github.com/maximhq/bifrost/plugins/maxim v1.0.6 h1:m1tWjbmxW9Lz4mDhXclQhZdFt/TrRPbZwFcoWY9ZAEk= github.com/maximhq/bifrost/plugins/maxim v1.0.6/go.mod h1:+D/E498VB4JNTEzG4fYyFJf9WQaq/9FgYrmzl49mLNc= github.com/maximhq/maxim-go v0.1.3 h1:nVzdz3hEjZVxmWHARWIM+Yrn1Jp50qrsK4BA/sz2jj8= diff --git a/ui/components/logs/log-detail-sheet.tsx b/ui/components/logs/log-detail-sheet.tsx index 5101c1ea92..6884513910 100644 --- a/ui/components/logs/log-detail-sheet.tsx +++ b/ui/components/logs/log-detail-sheet.tsx @@ -239,7 +239,11 @@ export function LogDetailSheet({ log, open, onOpenChange }: LogDetailSheetProps) embedding.embedding), + null, + 2, + ), }} /> diff --git a/ui/lib/types/logs.ts b/ui/lib/types/logs.ts index 8fe3f7762c..9513279251 100644 --- a/ui/lib/types/logs.ts +++ b/ui/lib/types/logs.ts @@ -107,6 +107,12 @@ export interface BifrostMessage { thought?: string } +export interface BifrostEmbedding { + index: number + object: string + embedding: string | number[] | number[][] +} + // Tool related types export interface FunctionParameters { type: string @@ -217,7 +223,7 @@ export interface LogEntry { model: string input_history: BifrostMessage[] output_message?: BifrostMessage - embedding_output?: number[][] + embedding_output?: BifrostEmbedding[] params?: ModelParameters speech_input?: SpeechInput transcription_input?: TranscriptionInput