Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/quickstart/http-transport.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ curl -X POST http://localhost:8080/v1/chat/completions \
# Use Anthropic
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model": "anthropic/claude-3-sonnet-20240229", "messages": [{"role": "user", "content": "Hello from Anthropic!"}], "params":{"max_tokens": 100}}'
-d '{"model": "anthropic/claude-3-sonnet-20240229", "messages": [{"role": "user", "content": "Hello from Anthropic!"}], "max_tokens": 100}'
```
Comment thread
TejasGhatte marked this conversation as resolved.

### **🔄 Add Automatic Fallbacks**
Expand All @@ -258,7 +258,7 @@ curl -X POST http://localhost:8080/v1/chat/completions \
"model": "openai/gpt-4o-mini",
"messages": [{"role": "user", "content": "Hello!"}],
"fallbacks": ["anthropic/claude-3-sonnet-20240229"],
"params": {"max_tokens": 100}
"max_tokens": 100
}'
```

Expand Down
13 changes: 4 additions & 9 deletions docs/usage/http-transport/endpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,8 @@ Chat conversation endpoint supporting all providers.
"content": "Hello, how are you?"
}
],
"params": {
"temperature": 0.7,
"max_tokens": 1000
},
"temperature": 0.7,
"max_tokens": 1000,
"fallbacks": ["anthropic/claude-3-sonnet-20240229"]
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
```
Expand Down Expand Up @@ -128,10 +126,8 @@ Text completion endpoint for simple text generation.
{
"model": "openai/gpt-4o-mini",
"text": "The future of AI is",
"params": {
"temperature": 0.8,
"max_tokens": 150
}
"temperature": 0.8,
"max_tokens": 150
}
```

Expand Down Expand Up @@ -422,7 +418,6 @@ bifrost_provider_errors_total{provider="openai",error_type="rate_limit"} 23
| Parameter | Type | Description | Example |
| ----------- | ------ | ----------------------- | ---------------------------------------- |
| `model` | string | Provider and model name | `"openai/gpt-4o-mini"` |
| `params` | object | Model parameters | `{"temperature": 0.7}` |
| `fallbacks` | array | Fallback model names | `["anthropic/claude-3-sonnet-20240229"]` |

### **Model Parameters**
Expand Down
90 changes: 51 additions & 39 deletions docs/usage/http-transport/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,32 +56,30 @@
"content": "What's the weather in San Francisco?"
}
],
"params": {
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
},
"required": ["location"]
}
}
}
],
"tool_choice": {
"tools": [
{
"type": "function",
"function": {
"name": "get_weather"
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
},
"required": ["location"]
}
}
}
],
"tool_choice": {
"type": "function",
"function": {
"name": "get_weather"
}
}
Comment thread
TejasGhatte marked this conversation as resolved.
}
},
Expand Down Expand Up @@ -123,10 +121,8 @@
]
}
],
"params": {
"max_tokens": 1000,
"temperature": 0.7
}
"max_tokens": 1000,
"temperature": 0.7
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
Expand Down Expand Up @@ -250,10 +246,8 @@
"value": {
"model": "anthropic/claude-2.1",
"text": "The future of artificial intelligence is",
"params": {
"max_tokens": 100,
"temperature": 0.7
}
"max_tokens": 100,
"temperature": 0.7
}
},
"with_stop_sequences": {
Expand All @@ -262,11 +256,9 @@
"provider": "anthropic",
"model": "claude-2.1",
"text": "Write a short story about a robot:",
"params": {
"max_tokens": 200,
"temperature": 0.8,
"stop_sequences": ["\n\n", "THE END"]
}
"max_tokens": 200,
"temperature": 0.8,
"stop_sequences": ["\n\n", "THE END"]
}
}
}
Expand Down Expand Up @@ -1737,8 +1729,11 @@
"description": "Array of chat messages",
"minItems": 1
},
"params": {
"$ref": "#/components/schemas/ModelParameters"
"max_tokens": {
"type": "integer",
"minimum": 1,
"description": "Maximum number of tokens to generate",
"example": 1000
},
"fallbacks": {
"type": "array",
Expand All @@ -1764,9 +1759,26 @@
"description": "Text prompt for completion",
"example": "The benefits of artificial intelligence include"
},
"params": {
"$ref": "#/components/schemas/ModelParameters"
"max_tokens": {
"type": "integer",
"minimum": 1,
"description": "Maximum number of tokens to generate",
"example": 1000
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"temperature": {
"type": "number",
"minimum": 0.0,
"maximum": 2.0,
"description": "Controls randomness in the output",
"example": 0.7
},
"stop_sequences": {
"type": "array",
"items": {
"type": "string"
},
"description": "Sequences that stop generation"
},
"fallbacks": {
"type": "array",
"items": {
Expand Down
114 changes: 110 additions & 4 deletions transports/bifrost-http/handlers/completions.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"strconv"
"strings"

"github.com/bytedance/sonic"
"github.com/fasthttp/router"
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
Expand All @@ -35,12 +36,38 @@ func NewCompletionHandler(client *bifrost.Bifrost, logger schemas.Logger) *Compl
}
}

// Known fields for CompletionRequest
var completionRequestKnownFields = map[string]bool{
"model": true,
"messages": true,
"text": true,
"fallbacks": true,
"stream": true,
"input": true,
"voice": true,
"instructions": true,
"response_format": true,
"stream_format": true,
"tool_choice": true,
"tools": true,
"temperature": true,
"top_p": true,
"top_k": true,
"max_tokens": true,
"stop_sequences": true,
"presence_penalty": true,
"frequency_penalty": true,
"parallel_tool_calls": true,
"encoding_format": true,
"dimensions": true,
"user": true,
}

// CompletionRequest represents a request for either text or chat completion
type CompletionRequest struct {
Model string `json:"model"` // Model to use in "provider/model" format
Messages []schemas.BifrostMessage `json:"messages"` // Chat messages (for chat completion)
Text string `json:"text"` // Text input (for text completion)
Params *schemas.ModelParameters `json:"params"` // Additional model parameters
Fallbacks []string `json:"fallbacks"` // Fallback providers and models in "provider/model" format
Stream *bool `json:"stream"` // Whether to stream the response

Expand All @@ -50,6 +77,85 @@ type CompletionRequest struct {
Instructions string `json:"instructions"`
ResponseFormat string `json:"response_format"`
StreamFormat *string `json:"stream_format,omitempty"`

ToolChoice *schemas.ToolChoice `json:"tool_choice,omitempty"` // Whether to call a tool
Tools *[]schemas.Tool `json:"tools,omitempty"` // Tools to use
Temperature *float64 `json:"temperature,omitempty"` // Controls randomness in the output
TopP *float64 `json:"top_p,omitempty"` // Controls diversity via nucleus sampling
TopK *int `json:"top_k,omitempty"` // Controls diversity via top-k sampling
MaxTokens *int `json:"max_tokens,omitempty"` // Maximum number of tokens to generate
StopSequences *[]string `json:"stop_sequences,omitempty"` // Sequences that stop generation
PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Penalizes repeated tokens
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Penalizes frequent tokens
ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` // Enables parallel tool calls
EncodingFormat *string `json:"encoding_format,omitempty"` // Format for embedding output (e.g., "float", "base64")
Dimensions *int `json:"dimensions,omitempty"` // Number of dimensions for embedding output
User *string `json:"user,omitempty"` // User identifier for tracking
// Dynamic parameters that can be provider-specific, they are directly
// added to the request as is.
ExtraParams map[string]interface{} `json:"-"`
}

func (cr *CompletionRequest) UnmarshalJSON(data []byte) error {
// Use type alias to avoid infinite recursion
type Alias CompletionRequest
aux := (*Alias)(cr)

// First unmarshal known fields
if err := sonic.Unmarshal(data, aux); err != nil {
return err
}

// Then unmarshal to map for unknown fields
var rawData map[string]json.RawMessage
if err := sonic.Unmarshal(data, &rawData); err != nil {
return err
}

// Initialize ExtraParams
if cr.ExtraParams == nil {
cr.ExtraParams = make(map[string]interface{})
}

// Extract unknown fields
for key, value := range rawData {
if !completionRequestKnownFields[key] {
var v interface{}
if err := sonic.Unmarshal(value, &v); err != nil {
continue // Skip fields that can't be unmarshaled
}
cr.ExtraParams[key] = v
}
}

return nil
}

func (cr *CompletionRequest) GetModelParameters() *schemas.ModelParameters {
params := &schemas.ModelParameters{
ExtraParams: make(map[string]interface{}),
ToolChoice: cr.ToolChoice,
Tools: cr.Tools,
Temperature: cr.Temperature,
TopP: cr.TopP,
TopK: cr.TopK,
MaxTokens: cr.MaxTokens,
StopSequences: cr.StopSequences,
PresencePenalty: cr.PresencePenalty,
FrequencyPenalty: cr.FrequencyPenalty,
ParallelToolCalls: cr.ParallelToolCalls,
EncodingFormat: cr.EncodingFormat,
Dimensions: cr.Dimensions,
User: cr.User,
}

if cr.ExtraParams != nil {
for k, v := range cr.ExtraParams {
params.ExtraParams[k] = v
}
}

return params
}

type CompletionType string
Expand Down Expand Up @@ -290,7 +396,7 @@ func (h *CompletionHandler) TranscriptionCompletion(ctx *fasthttp.RequestCtx) {
// It handles request parsing, validation, and response formatting
func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionType CompletionType) {
var req CompletionRequest
if err := json.Unmarshal(ctx.PostBody(), &req); err != nil {
if err := sonic.Unmarshal(ctx.PostBody(), &req); err != nil {
SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid request format: %v", err), h.logger)
return
}
Expand Down Expand Up @@ -326,7 +432,7 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
bifrostReq := &schemas.BifrostRequest{
Model: modelName,
Provider: schemas.ModelProvider(provider),
Params: req.Params,
Params: req.GetModelParameters(),
Fallbacks: fallbacks,
}

Expand Down Expand Up @@ -458,7 +564,7 @@ func (h *CompletionHandler) handleStreamingResponse(ctx *fasthttp.RequestCtx, ge
}

// Convert response to JSON
responseJSON, err := json.Marshal(data)
responseJSON, err := sonic.Marshal(data)
if err != nil {
h.logger.Warn(fmt.Sprintf("Failed to marshal streaming response: %v", err))
continue
Expand Down
6 changes: 6 additions & 0 deletions transports/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/maximhq/bifrost/transports
go 1.24.1

require (
github.com/bytedance/sonic v1.14.0
github.com/fasthttp/router v1.5.4
Comment thread
Pratham-Mishra04 marked this conversation as resolved.
github.com/fasthttp/websocket v1.5.12
github.com/google/uuid v1.6.0
Expand Down Expand Up @@ -37,6 +38,7 @@ require (
github.com/bytedance/sonic/loader v0.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cloudwego/base64x v0.1.5 // indirect
github.com/cloudwego/base64x v0.1.5 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
Expand All @@ -47,6 +49,7 @@ require (
github.com/gorilla/websocket v1.5.3 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/mark3labs/mcp-go v0.32.0 // indirect
github.com/maximhq/maxim-go v0.1.3 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
Expand All @@ -56,6 +59,7 @@ require (
github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
Expand All @@ -64,6 +68,7 @@ require (
go.opentelemetry.io/otel/metric v1.35.0 // indirect
go.opentelemetry.io/otel/trace v1.35.0 // indirect
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
golang.org/x/crypto v0.38.0 // indirect
golang.org/x/net v0.40.0 // indirect
golang.org/x/oauth2 v0.30.0 // indirect
Expand All @@ -73,3 +78,4 @@ require (
google.golang.org/grpc v1.72.0 // indirect
google.golang.org/protobuf v1.36.6 // indirect
)