maximhq · Pratham-Mishra04 · Aug 5, 2025 · Jul 30, 2025
diff --git a/docs/quickstart/http-transport.md b/docs/quickstart/http-transport.md
@@ -245,7 +245,7 @@ curl -X POST http://localhost:8080/v1/chat/completions \
 # Use Anthropic
 curl -X POST http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
-  -d '{"model": "anthropic/claude-3-sonnet-20240229", "messages": [{"role": "user", "content": "Hello from Anthropic!"}], "params":{"max_tokens": 100}}'
+  -d '{"model": "anthropic/claude-3-sonnet-20240229", "messages": [{"role": "user", "content": "Hello from Anthropic!"}], "max_tokens": 100}'
 ```
 
 ### **🔄 Add Automatic Fallbacks**
@@ -258,7 +258,7 @@ curl -X POST http://localhost:8080/v1/chat/completions \
     "model": "openai/gpt-4o-mini",
     "messages": [{"role": "user", "content": "Hello!"}],
     "fallbacks": ["anthropic/claude-3-sonnet-20240229"],
-    "params": {"max_tokens": 100}
+    "max_tokens": 100
   }'
 ```
 

diff --git a/docs/usage/http-transport/endpoints.md b/docs/usage/http-transport/endpoints.md
@@ -38,10 +38,8 @@ Chat conversation endpoint supporting all providers.
       "content": "Hello, how are you?"
     }
   ],
-  "params": {
-    "temperature": 0.7,
-    "max_tokens": 1000
-  },
+  "temperature": 0.7,
+  "max_tokens": 1000,
   "fallbacks": ["anthropic/claude-3-sonnet-20240229"]
 }
 ```
@@ -128,10 +126,8 @@ Text completion endpoint for simple text generation.
 {
   "model": "openai/gpt-4o-mini",
   "text": "The future of AI is",
-  "params": {
-    "temperature": 0.8,
-    "max_tokens": 150
-  }
+  "temperature": 0.8,
+  "max_tokens": 150
 }
 ```
 
@@ -422,7 +418,6 @@ bifrost_provider_errors_total{provider="openai",error_type="rate_limit"} 23
 | Parameter   | Type   | Description             | Example                                  |
 | ----------- | ------ | ----------------------- | ---------------------------------------- |
 | `model`     | string | Provider and model name | `"openai/gpt-4o-mini"`                   |
-| `params`    | object | Model parameters        | `{"temperature": 0.7}`                   |
 | `fallbacks` | array  | Fallback model names    | `["anthropic/claude-3-sonnet-20240229"]` |
 
 ### **Model Parameters**

diff --git a/docs/usage/http-transport/openapi.json b/docs/usage/http-transport/openapi.json
@@ -56,32 +56,30 @@
                         "content": "What's the weather in San Francisco?"
                       }
                     ],
-                    "params": {
-                      "tools": [
-                        {
-                          "type": "function",
-                          "function": {
-                            "name": "get_weather",
-                            "description": "Get current weather for a location",
-                            "parameters": {
-                              "type": "object",
-                              "properties": {
-                                "location": {
-                                  "type": "string",
-                                  "description": "The city and state, e.g. San Francisco, CA"
-                                }
-                              },
-                              "required": ["location"]
-                            }
-                          }
-                        }
-                      ],
-                      "tool_choice": {
+                    "tools": [
+                      {
                         "type": "function",
                         "function": {
-                          "name": "get_weather"
+                          "name": "get_weather",
+                          "description": "Get current weather for a location",
+                          "parameters": {
+                            "type": "object",
+                            "properties": {
+                              "location": {
+                                "type": "string",
+                                "description": "The city and state, e.g. San Francisco, CA"
+                              }
+                            },
+                            "required": ["location"]
+                          }
                         }
                       }
+                    ],
+                    "tool_choice": {
+                      "type": "function",
+                      "function": {
+                        "name": "get_weather"
+                      }
                     }
                   }
                 },
@@ -123,10 +121,8 @@
                         ]
                       }
                     ],
-                    "params": {
-                      "max_tokens": 1000,
-                      "temperature": 0.7
-                    }
+                    "max_tokens": 1000,
+                    "temperature": 0.7
                   }
                 }
               }
@@ -250,10 +246,8 @@
                   "value": {
                     "model": "anthropic/claude-2.1",
                     "text": "The future of artificial intelligence is",
-                    "params": {
-                      "max_tokens": 100,
-                      "temperature": 0.7
-                    }
+                    "max_tokens": 100,
+                    "temperature": 0.7
                   }
                 },
                 "with_stop_sequences": {
@@ -262,11 +256,9 @@
                     "provider": "anthropic",
                     "model": "claude-2.1",
                     "text": "Write a short story about a robot:",
-                    "params": {
-                      "max_tokens": 200,
-                      "temperature": 0.8,
-                      "stop_sequences": ["\n\n", "THE END"]
-                    }
+                    "max_tokens": 200,
+                    "temperature": 0.8,
+                    "stop_sequences": ["\n\n", "THE END"]
                   }
                 }
               }
@@ -1737,8 +1729,11 @@
             "description": "Array of chat messages",
             "minItems": 1
           },
-          "params": {
-            "$ref": "#/components/schemas/ModelParameters"
+          "max_tokens": {
+            "type": "integer",
+            "minimum": 1,
+            "description": "Maximum number of tokens to generate",
+            "example": 1000
           },
           "fallbacks": {
             "type": "array",
@@ -1764,9 +1759,26 @@
             "description": "Text prompt for completion",
             "example": "The benefits of artificial intelligence include"
           },
-          "params": {
-            "$ref": "#/components/schemas/ModelParameters"
+          "max_tokens": {
+            "type": "integer",
+            "minimum": 1,
+            "description": "Maximum number of tokens to generate",
+            "example": 1000
           },
+          "temperature": {
+            "type": "number",
+            "minimum": 0.0,
+            "maximum": 2.0,
+            "description": "Controls randomness in the output",
+            "example": 0.7
+          },
+          "stop_sequences": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Sequences that stop generation"
+          },  
           "fallbacks": {
             "type": "array",
             "items": {

diff --git a/transports/bifrost-http/handlers/completions.go b/transports/bifrost-http/handlers/completions.go
@@ -14,6 +14,7 @@ import (
 	"strconv"
 	"strings"
 
+	"github.com/bytedance/sonic"
 	"github.com/fasthttp/router"
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
@@ -35,12 +36,38 @@ func NewCompletionHandler(client *bifrost.Bifrost, logger schemas.Logger) *Compl
 	}
 }
 
+// Known fields for CompletionRequest
+var completionRequestKnownFields = map[string]bool{
+	"model":               true,
+	"messages":            true,
+	"text":                true,
+	"fallbacks":           true,
+	"stream":              true,
+	"input":               true,
+	"voice":               true,
+	"instructions":        true,
+	"response_format":     true,
+	"stream_format":       true,
+	"tool_choice":         true,
+	"tools":               true,
+	"temperature":         true,
+	"top_p":               true,
+	"top_k":               true,
+	"max_tokens":          true,
+	"stop_sequences":      true,
+	"presence_penalty":    true,
+	"frequency_penalty":   true,
+	"parallel_tool_calls": true,
+	"encoding_format":     true,
+	"dimensions":          true,
+	"user":                true,
+}
+
 // CompletionRequest represents a request for either text or chat completion
 type CompletionRequest struct {
 	Model     string                   `json:"model"`     // Model to use in "provider/model" format
 	Messages  []schemas.BifrostMessage `json:"messages"`  // Chat messages (for chat completion)
 	Text      string                   `json:"text"`      // Text input (for text completion)
-	Params    *schemas.ModelParameters `json:"params"`    // Additional model parameters
 	Fallbacks []string                 `json:"fallbacks"` // Fallback providers and models in "provider/model" format
 	Stream    *bool                    `json:"stream"`    // Whether to stream the response
 
@@ -50,6 +77,85 @@ type CompletionRequest struct {
 	Instructions   string                   `json:"instructions"`
 	ResponseFormat string                   `json:"response_format"`
 	StreamFormat   *string                  `json:"stream_format,omitempty"`
+
+	ToolChoice        *schemas.ToolChoice `json:"tool_choice,omitempty"`         // Whether to call a tool
+	Tools             *[]schemas.Tool     `json:"tools,omitempty"`               // Tools to use
+	Temperature       *float64            `json:"temperature,omitempty"`         // Controls randomness in the output
+	TopP              *float64            `json:"top_p,omitempty"`               // Controls diversity via nucleus sampling
+	TopK              *int                `json:"top_k,omitempty"`               // Controls diversity via top-k sampling
+	MaxTokens         *int                `json:"max_tokens,omitempty"`          // Maximum number of tokens to generate
+	StopSequences     *[]string           `json:"stop_sequences,omitempty"`      // Sequences that stop generation
+	PresencePenalty   *float64            `json:"presence_penalty,omitempty"`    // Penalizes repeated tokens
+	FrequencyPenalty  *float64            `json:"frequency_penalty,omitempty"`   // Penalizes frequent tokens
+	ParallelToolCalls *bool               `json:"parallel_tool_calls,omitempty"` // Enables parallel tool calls
+	EncodingFormat    *string             `json:"encoding_format,omitempty"`     // Format for embedding output (e.g., "float", "base64")
+	Dimensions        *int                `json:"dimensions,omitempty"`          // Number of dimensions for embedding output
+	User              *string             `json:"user,omitempty"`                // User identifier for tracking
+	// Dynamic parameters that can be provider-specific, they are directly
+	// added to the request as is.
+	ExtraParams map[string]interface{} `json:"-"`
+}
+
+func (cr *CompletionRequest) UnmarshalJSON(data []byte) error {
+	// Use type alias to avoid infinite recursion
+	type Alias CompletionRequest
+	aux := (*Alias)(cr)
+
+	// First unmarshal known fields
+	if err := sonic.Unmarshal(data, aux); err != nil {
+		return err
+	}
+
+	// Then unmarshal to map for unknown fields
+	var rawData map[string]json.RawMessage
+	if err := sonic.Unmarshal(data, &rawData); err != nil {
+		return err
+	}
+
+	// Initialize ExtraParams
+	if cr.ExtraParams == nil {
+		cr.ExtraParams = make(map[string]interface{})
+	}
+
+	// Extract unknown fields
+	for key, value := range rawData {
+		if !completionRequestKnownFields[key] {
+			var v interface{}
+			if err := sonic.Unmarshal(value, &v); err != nil {
+				continue // Skip fields that can't be unmarshaled
+			}
+			cr.ExtraParams[key] = v
+		}
+	}
+
+	return nil
+}
+
+func (cr *CompletionRequest) GetModelParameters() *schemas.ModelParameters {
+	params := &schemas.ModelParameters{
+		ExtraParams:       make(map[string]interface{}),
+		ToolChoice:        cr.ToolChoice,
+		Tools:             cr.Tools,
+		Temperature:       cr.Temperature,
+		TopP:              cr.TopP,
+		TopK:              cr.TopK,
+		MaxTokens:         cr.MaxTokens,
+		StopSequences:     cr.StopSequences,
+		PresencePenalty:   cr.PresencePenalty,
+		FrequencyPenalty:  cr.FrequencyPenalty,
+		ParallelToolCalls: cr.ParallelToolCalls,
+		EncodingFormat:    cr.EncodingFormat,
+		Dimensions:        cr.Dimensions,
+		User:              cr.User,
+	}
+
+	if cr.ExtraParams != nil {
+		for k, v := range cr.ExtraParams {
+			params.ExtraParams[k] = v
+		}
+	}
+
+	return params
 }
 
 type CompletionType string
@@ -290,7 +396,7 @@ func (h *CompletionHandler) TranscriptionCompletion(ctx *fasthttp.RequestCtx) {
 // It handles request parsing, validation, and response formatting
 func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionType CompletionType) {
 	var req CompletionRequest
-	if err := json.Unmarshal(ctx.PostBody(), &req); err != nil {
+	if err := sonic.Unmarshal(ctx.PostBody(), &req); err != nil {
 		SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid request format: %v", err), h.logger)
 		return
 	}
@@ -326,7 +432,7 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
 	bifrostReq := &schemas.BifrostRequest{
 		Model:     modelName,
 		Provider:  schemas.ModelProvider(provider),
-		Params:    req.Params,
+		Params:    req.GetModelParameters(),
 		Fallbacks: fallbacks,
 	}
 
@@ -458,7 +564,7 @@ func (h *CompletionHandler) handleStreamingResponse(ctx *fasthttp.RequestCtx, ge
 			}
 
 			// Convert response to JSON
-			responseJSON, err := json.Marshal(data)
+			responseJSON, err := sonic.Marshal(data)
 			if err != nil {
 				h.logger.Warn(fmt.Sprintf("Failed to marshal streaming response: %v", err))
 				continue

diff --git a/transports/go.mod b/transports/go.mod
@@ -3,6 +3,7 @@ module github.com/maximhq/bifrost/transports
 go 1.24.1
 
 require (
+	github.com/bytedance/sonic v1.14.0
 	github.com/fasthttp/router v1.5.4
 	github.com/fasthttp/websocket v1.5.12
 	github.com/google/uuid v1.6.0
@@ -37,6 +38,7 @@ require (
 	github.com/bytedance/sonic/loader v0.3.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/cloudwego/base64x v0.1.5 // indirect
+	github.com/cloudwego/base64x v0.1.5 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -47,6 +49,7 @@ require (
 	github.com/gorilla/websocket v1.5.3 // indirect
 	github.com/klauspost/compress v1.18.0 // indirect
 	github.com/klauspost/cpuid/v2 v2.0.9 // indirect
+	github.com/klauspost/cpuid/v2 v2.0.9 // indirect
 	github.com/mark3labs/mcp-go v0.32.0 // indirect
 	github.com/maximhq/maxim-go v0.1.3 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
@@ -56,6 +59,7 @@ require (
 	github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 // indirect
 	github.com/spf13/cast v1.7.1 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
@@ -64,6 +68,7 @@ require (
 	go.opentelemetry.io/otel/metric v1.35.0 // indirect
 	go.opentelemetry.io/otel/trace v1.35.0 // indirect
 	golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
+	golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
 	golang.org/x/crypto v0.38.0 // indirect
 	golang.org/x/net v0.40.0 // indirect
 	golang.org/x/oauth2 v0.30.0 // indirect
@@ -73,3 +78,4 @@ require (
 	google.golang.org/grpc v1.72.0 // indirect
 	google.golang.org/protobuf v1.36.6 // indirect
 )
+