Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/skills/docs-writer/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ grep -n 'func.*create\|func.*update\|func.*delete\|func.*get' transports/bifrost
| `plugins.go` | `/api/plugins` | CRUD plugins |
| `config.go` | `/api/config` | GET/PUT config |
| `config.go` | `/api/proxy-config` | GET/PUT proxy config |
| `cache.go` | `/api/cache/clear/{requestId}` | DELETE cache |
| `cache.go` | `/api/cache/clear/{cacheId}` | DELETE cache |
| `session.go` | `/api/session/*` | Login/logout/auth check |
| `oauth2.go` | `/api/oauth/*` | OAuth callback/status |

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ transports/schema/config.schema.json
*.db
*.db-shm
*.db-wal
transports/bifrost-http/v1.5.x

# Test reports
test-reports
Expand Down
10 changes: 7 additions & 3 deletions core/schemas/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ const (
BifrostContextKeyTargetUserID BifrostContextKey = "target_user_id"
BifrostContextKeyIsAzureUserAgent BifrostContextKey = "bifrost-is-azure-user-agent" // bool (set by bifrost - DO NOT SET THIS MANUALLY)) - whether the request is an Azure user agent (only used in gateway)
BifrostContextKeyVideoOutputRequested BifrostContextKey = "bifrost-video-output-requested"
BifrostContextKeyValidateKeys BifrostContextKey = "bifrost-validate-keys" // bool (triggers additional key validation during provider add/update)
BifrostContextKeyValidateKeys BifrostContextKey = "bifrost-validate-keys" // bool (triggers additional key validation during provider add/update)
BifrostContextKeyProviderResponseHeaders BifrostContextKey = "bifrost-provider-response-headers" // map[string]string (set by provider handlers for response header forwarding)
BifrostContextKeyMCPAddedTools BifrostContextKey = "bifrost-mcp-added-tools" // []string (set by bifrost - DO NOT SET THIS MANUALLY)) - list of tools added to the request by MCP, all the tool are in the format "clientName-toolName"
BifrostContextKeyLargePayloadMode BifrostContextKey = "bifrost-large-payload-mode" // bool (set by bifrost - DO NOT SET THIS MANUALLY)) indicates large payload streaming mode is active
Expand All @@ -287,7 +287,7 @@ BifrostContextKeyValidateKeys BifrostContextKey = "bifros
BifrostContextKeySessionID BifrostContextKey = "bifrost-session-id" // string session ID for the request (session stickiness)
BifrostContextKeySessionTTL BifrostContextKey = "bifrost-session-ttl" // time.Duration session TTL for the request (session stickiness)
BifrostContextKeyMCPExtraHeaders BifrostContextKey = "bifrost-mcp-extra-headers" // map[string][]string (these headers are forwarded only to the MCP while tool execution if they are in the allowlist of the MCP client)
BifrostContextKeyMCPLogID BifrostContextKey = "bifrost-mcp-log-id" // string (unique UUID for each MCP tool log entry - set per goroutine by agent executor - DO NOT SET THIS MANUALLY)
BifrostContextKeyMCPLogID BifrostContextKey = "bifrost-mcp-log-id" // string (unique UUID for each MCP tool log entry - set per goroutine by agent executor - DO NOT SET THIS MANUALLY)
BifrostContextKeyCompatConvertTextToChat BifrostContextKey = "bifrost-compat-convert-text-to-chat" // bool (per-request override from x-bf-compat header)
BifrostContextKeyCompatConvertChatToResponses BifrostContextKey = "bifrost-compat-convert-chat-to-responses" // bool (per-request override from x-bf-compat header)
BifrostContextKeyCompatShouldDropParams BifrostContextKey = "bifrost-compat-should-drop-params" // bool (per-request override from x-bf-compat header)
Expand All @@ -296,7 +296,7 @@ BifrostContextKeyValidateKeys BifrostContextKey = "bifros
BifrostContextKeyDimensions BifrostContextKey = "bifrost-dimensions" // map[string]string (set by HTTP transport from x-bf-dim-* headers) BifrostContextKeyDimensions holds per-request key/value dimensions supplied via x-bf-dim-<key> request headers. These dimensions are forwarded to internal logs (as metadata)
BifrostContextKeySkipModelCatalogProviderSelection BifrostContextKey = "bifrost-skip-model-catalog-provider-selection" // bool (set by bifrost - DO NOT SET THIS MANUALLY)) - skip model catalog provider selection
IsAPIKeyAuthContextKey BifrostContextKey = "is_api_key_auth"
IsLocalAdminContextKey BifrostContextKey = "is_local_admin" // bool (set by auth middleware when password-based auth succeeds - local admin user bypasses RBAC)
IsLocalAdminContextKey BifrostContextKey = "is_local_admin" // bool (set by auth middleware when password-based auth succeeds - local admin user bypasses RBAC)
)

const (
Expand Down Expand Up @@ -1242,6 +1242,10 @@ type BifrostCacheDebug struct {
// Semantic cache only (only when cache is hit)
Threshold *float64 `json:"threshold,omitempty"`
Similarity *float64 `json:"similarity,omitempty"`

// CacheHitLatency is the time in milliseconds spent serving the cache hit
// (lookup + response build). Only set when CacheHit is true.
CacheHitLatency *int64 `json:"cache_hit_latency,omitempty"`
}

const (
Expand Down
35 changes: 35 additions & 0 deletions core/schemas/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,41 @@ func (bc *BifrostContext) WithValue(key any, value any) *BifrostContext {
return bc
}

// Root returns the underlying root BifrostContext. For root contexts this is
// the receiver itself; for plugin-scoped contexts it is the underlying root
// that scoped Value/SetValue calls delegate to.
//
// PLUGIN AUTHORS: capture Root() synchronously inside Pre/PostLLMHook (or
// any other hook) when you need to write to the context from a goroutine
// that outlives the hook. The plugin-scoped *BifrostContext passed into your
// hook is reclaimed by an internal sync.Pool the moment the hook returns —
// any later SetValue/Value call on it lands in detached storage that nobody
// downstream can read (and can leak into a future pool reuse). The root,
// in contrast, lives for the entire request, so a pointer captured here is
// safe to use for the lifetime of the request even after your hook returns.
//
// Example:
//
// func (p *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req ...) (...) {
// rootCtx := ctx.Root() // capture before the scope is released
// go func() {
// // ... long-running work that produces stream chunks ...
// rootCtx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
// }()
// return req, &schemas.LLMPluginShortCircuit{Stream: ch}, nil
// }
func (bc *BifrostContext) Root() *BifrostContext {
// Unwrap the full delegation chain. A scoped context can in principle be
// derived from another scoped context (e.g. nested plugin scopes), and
// stopping at the first valueDelegate would return an intermediate pooled
// scope — which loses the async-safety guarantee as soon as that
// intermediate scope is released.
for bc != nil && bc.valueDelegate != nil {
bc = bc.valueDelegate
}
return bc
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// BlockRestrictedWrites returns true if restricted writes are blocked.
func (bc *BifrostContext) BlockRestrictedWrites() {
bc.blockRestrictedWrites.Store(true)
Expand Down
34 changes: 34 additions & 0 deletions core/schemas/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,3 +329,37 @@ func TestPluginLog_PoolReuse(t *testing.T) {
t.Errorf("expected 100 logs from pool reuse, got %d", len(logs))
}
}

// TestRoot_UnwrapsChainedValueDelegates verifies Root() walks the entire
// delegate chain. A naive single-step unwrap would return an intermediate
// pooled scope, which loses the async-safety guarantee as soon as that
// intermediate scope is recycled.
func TestRoot_UnwrapsChainedValueDelegates(t *testing.T) {
root := NewBifrostContext(context.Background(), NoDeadline)

a := "outer"
b := "inner"
outer := root.WithPluginScope(&a)
// Manually build a second scoped context whose delegate is the first
// scoped context — simulates a plugin that derives its own scope from
// an already-scoped ctx.
inner := &BifrostContext{
parent: outer.parent,
done: outer.done,
pluginScope: &b,
valueDelegate: outer,
}

got := inner.Root()
if got != root {
t.Fatalf("Root() did not walk the chain to the request root: got %p, want %p", got, root)
}
if got.valueDelegate != nil {
t.Fatalf("Root() returned a context with a non-nil valueDelegate: %+v", got)
}

// Sanity: Root() on a non-scoped context returns itself.
if root.Root() != root {
t.Fatal("Root() on a non-scoped context should return the receiver")
}
}
28 changes: 17 additions & 11 deletions docs/features/semantic-caching.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,9 @@ bifrostConfig := schemas.BifrostConfig{
**Cache Settings**:
- **TTL (seconds)**: How long cached responses are kept (default: 300 s).
- **Similarity Threshold**: Cosine similarity cutoff for a cache hit (0–1, default: 0.8).
- **Dimension**: Vector dimension matching your embedding model (e.g. 1536 for `text-embedding-3-small`).
- **Dimension**: Vector size produced by the embedding model — must match the model exactly. Common values: `1536` for OpenAI `text-embedding-3-small`, `3072` for `text-embedding-3-large`, `768` for many Cohere/Voyage models. Use `1` only in direct-only mode (no provider).

> **Heads up**: a vector store namespace can only hold vectors of *one* dimension. Whenever you change the embedding **provider**, **model**, or **dimension**, make sure the new dimension still matches what the model produces — otherwise writes to the existing namespace will fail and reads will silently miss. The namespace is **not** recreated automatically; either point `vector_store_namespace` at a fresh name or drop the existing class/index in your vector store before saving.

**Conversation Settings**:
- **Conversation History Threshold**: Skip caching when the conversation has more than this many messages (default: 3).
Expand Down Expand Up @@ -612,6 +614,7 @@ Example HTTP Response:
"extra_fields": {
"cache_debug": {
"cache_hit": false,
"cache_id": "550e8500-e29b-41d4-a725-446655440001",
"provider_used": "openai",
"model_used": "gpt-4o-mini",
"input_tokens": 20
Expand All @@ -620,22 +623,21 @@ Example HTTP Response:
}
```


These variables allow you to detect cached responses and get the cache entry ID needed for clearing specific entries.
`cache_debug` is populated on both hits and misses. `cache_id` is the storage ID of the entry — use it to invalidate the entry later. The embedding-related fields (`provider_used`, `model_used`, `input_tokens`) are only present when semantic search actually ran.

### Clear Specific Cache Entry

Use the request ID from cached responses to clear specific entries:
Use the `cache_id` from `cache_debug` to clear a specific entry:

<Tabs group="cache-clear">

<Tab title="Go SDK">

```go
// Clear specific entry by request ID
err := plugin.ClearCacheForRequestID("550e8400-e29b-41d4-a716-446655440000")
// Clear specific entry by cache ID (read from response.ExtraFields.CacheDebug.CacheID)
err := plugin.ClearCacheForCacheID("550e8500-e29b-41d4-a725-446655440001")

// Clear all entries for a cache key
// Clear all entries for a cache key
err := plugin.ClearCacheForKey("support-session-456")
```

Expand All @@ -644,8 +646,8 @@ err := plugin.ClearCacheForKey("support-session-456")
<Tab title="HTTP API">

```bash
# Clear specific cached entry by request ID
curl -X DELETE http://localhost:8080/api/cache/clear/550e8400-e29b-41d4-a716-446655440000
# Clear specific cached entry by cache ID
curl -X DELETE http://localhost:8080/api/cache/clear/550e8500-e29b-41d4-a725-446655440001

# Clear all entries for a cache key
curl -X DELETE http://localhost:8080/api/cache/clear-by-key/support-session-456
Expand All @@ -665,7 +667,7 @@ The semantic cache automatically handles cleanup to prevent storage bloat:
- **Namespace Isolation**: Each Bifrost instance uses isolated vector store namespaces to prevent conflicts

**Manual Cleanup Options:**
- Clear specific entries by request ID (see examples above)
- Clear specific entries by cache ID (see examples above)
- Clear all entries for a cache key
- Restart Bifrost to clear all cache data

Expand All @@ -674,7 +676,11 @@ The semantic cache namespace and all its cache entries are deleted when Bifrost
</Warning>

<Warning>
**Dimension Changes**: If you update the `dimension` config, the existing namespace will contain data with mixed dimensions, causing retrieval issues. To avoid this, either use a different `vector_store_namespace` or set `cleanup_on_shutdown: true` before restarting.
**Dimension / Provider / Model Changes**: A vector store namespace can only hold vectors of **one** dimension. If you change `dimension` (or switch to an embedding `provider`/`model` that produces a different vector size), the existing namespace is **not** recreated automatically — `CreateNamespace` is a no-op when the class/collection already exists. Subsequent writes will fail (vector-size mismatch) and reads will silently miss. Before saving the change, either:

- point `vector_store_namespace` at a fresh name, or
- drop the existing class/index in your vector store, or
- set `cleanup_on_shutdown: true` and restart so the old namespace is removed first.
</Warning>

---
Expand Down
65 changes: 65 additions & 0 deletions docs/migration-guides/v1.5.0.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,67 @@ Single-key, pinned (`x-bf-key-id` / `x-bf-key-name`), and session-sticky request

---

## Breaking Change 13: Semantic Cache Clear API is Now Cache-ID Based

The semantic cache "clear by request ID" API has been removed. Storage IDs in the cache are deterministic UUIDv5 hashes derived from the request payload (so the same prompt across many requests maps to a single cache entry), which made the previous request-ID-based delete unable to match anything written by the direct-search path.

The replacement is keyed on the cache entry's storage ID, which is now stamped on every response in `extra_fields.cache_debug.cache_id` — on cache hits **and** cache misses. Hold onto that ID from the response if you ever need to invalidate the entry.

### REST API

| Before (v1.4.x) | After (v1.5.0) |
|---|---|
| `DELETE /api/cache/clear/{requestId}` | `DELETE /api/cache/clear/{cacheId}` |

The path parameter name and meaning both changed. The cache key endpoint (`DELETE /api/cache/clear-by-key/{cacheKey}`) is unchanged.

**Before:**
```bash
curl -X DELETE localhost:8080/api/cache/clear/req-aaa-bbb-ccc
```

**After:**
```bash
# Read the cache ID from a prior response
CACHE_ID=$(curl ... | jq -r '.extra_fields.cache_debug.cache_id')

curl -X DELETE localhost:8080/api/cache/clear/$CACHE_ID
```

### Go SDK

The `ClearCacheForRequestID` method on `*semanticcache.Plugin` has been removed and replaced by `ClearCacheForCacheID`.

**Before:**
```go
err := plugin.ClearCacheForRequestID(requestID)
```

**After:**
```go
// On hit or miss, the storage ID is exposed via CacheDebug.CacheID
cacheID := response.ExtraFields.CacheDebug.CacheID
if cacheID != nil {
err := plugin.ClearCacheForCacheID(*cacheID)
}
```

### Why the rename

A single cache entry is reused across many request IDs (that is the point of caching). A request-ID-based delete only ever made sense for the original writer of the entry, and even that broke once direct search switched to deterministic storage IDs. The cache ID is the only stable handle that works for both writers and readers, so the API now reflects that.

### CacheDebug on misses

`extra_fields.cache_debug` is now populated on cache misses too — previously it was only emitted when semantic search ran. The new fields on a miss:

- `cache_hit: false`
- `cache_id`: the storage ID where the entry was written (use this with `ClearCacheForCacheID`)
- `provider_used` / `model_used` / `input_tokens`: only present when semantic search actually ran (i.e. embedding model was invoked)

If you parse `cache_debug` and assumed it was either absent or had `cache_hit: true`, update your consumer to handle the `cache_hit: false` shape.

---

## Opting Out: `version: 1` Compatibility Mode

If you are not ready to adopt the new deny-by-default semantics, you can add a single field to `config.json` to restore v1.4.x behavior for all allow-list fields loaded from that file:
Expand Down Expand Up @@ -611,6 +672,10 @@ Replace `.Model` with `.RequestedModel` (and optionally `.ResolvedModel`) on any
<Step title="Handle empty selected_key_id on terminal retry failures">
If your code reads `selected_key_id` / `selected_key_name` from the request context or log entries to attribute failed requests, add a null/empty check and fall back to `attempt_trail` for the full per-attempt key history.
</Step>

<Step title="Switch semantic cache invalidation to cache IDs">
Replace `DELETE /api/cache/clear/{requestId}` with `DELETE /api/cache/clear/{cacheId}`, and replace `plugin.ClearCacheForRequestID(...)` with `plugin.ClearCacheForCacheID(...)`. Read the cache ID from `extra_fields.cache_debug.cache_id` on the response (now populated on misses too).
</Step>
</Steps>

---
Expand Down
12 changes: 6 additions & 6 deletions docs/openapi/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -41769,20 +41769,20 @@
}
}
},
"/api/cache/clear/{requestId}": {
"/api/cache/clear/{cacheId}": {
"delete": {
"operationId": "clearCacheByRequestId",
"summary": "Clear cache by request ID",
"description": "Clears cache entries associated with a specific request ID.",
"operationId": "clearCacheByCacheId",
"summary": "Clear cache entry by cache ID",
"description": "Deletes a single cache entry by its storage ID. Read the cache ID from\n`extra_fields.cache_debug.cache_id` on a prior response — it is populated\non both cache hits and cache misses.\n",
"tags": [
"Cache"
],
"parameters": [
{
"name": "requestId",
"name": "cacheId",
"in": "path",
"required": true,
"description": "Request ID to clear cache for",
"description": "Storage ID of the cache entry to delete",
"schema": {
"type": "string"
}
Expand Down
4 changes: 2 additions & 2 deletions docs/openapi/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,8 @@ paths:
$ref: './paths/management/prompts.yaml#/sessions-commit'

# Cache
/api/cache/clear/{requestId}:
$ref: './paths/management/cache.yaml#/clear-by-request-id'
/api/cache/clear/{cacheId}:
$ref: './paths/management/cache.yaml#/clear-by-cache-id'
/api/cache/clear-by-key/{cacheKey}:
$ref: './paths/management/cache.yaml#/clear-by-cache-key'

Expand Down
15 changes: 9 additions & 6 deletions docs/openapi/paths/management/cache.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
clear-by-request-id:
clear-by-cache-id:
delete:
operationId: clearCacheByRequestId
summary: Clear cache by request ID
description: Clears cache entries associated with a specific request ID.
operationId: clearCacheByCacheId
summary: Clear cache entry by cache ID
description: |
Deletes a single cache entry by its storage ID. Read the cache ID from
`extra_fields.cache_debug.cache_id` on a prior response — it is populated
on both cache hits and cache misses.
tags:
- Cache
parameters:
- name: requestId
- name: cacheId
in: path
required: true
description: Request ID to clear cache for
description: Storage ID of the cache entry to delete
schema:
type: string
responses:
Expand Down
3 changes: 2 additions & 1 deletion framework/logstore/matviews.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ func canUseMatViewFilters(f SearchFilters) bool {
f.MinLatency == nil && f.MaxLatency == nil &&
f.MinTokens == nil && f.MaxTokens == nil &&
f.MinCost == nil && f.MaxCost == nil &&
!f.MissingCostOnly
!f.MissingCostOnly &&
len(f.CacheHitTypes) == 0
}

// canUseMatView checks both that materialized views are ready (created and
Expand Down
Loading
Loading