maximhq · Pratham-Mishra04 · Apr 30, 2026
@@ -176,7 +176,7 @@ grep -n 'func.*create\|func.*update\|func.*delete\|func.*get' transports/bifrost
 | `plugins.go` | `/api/plugins` | CRUD plugins |
 | `config.go` | `/api/config` | GET/PUT config |
 | `config.go` | `/api/proxy-config` | GET/PUT proxy config |
-| `cache.go` | `/api/cache/clear/{requestId}` | DELETE cache |
+| `cache.go` | `/api/cache/clear/{cacheId}` | DELETE cache |
 | `session.go` | `/api/session/*` | Login/logout/auth check |
 | `oauth2.go` | `/api/oauth/*` | OAuth callback/status |
 

diff --git a/.gitignore b/.gitignore
@@ -45,6 +45,7 @@ transports/schema/config.schema.json
 *.db
 *.db-shm
 *.db-wal
+transports/bifrost-http/v1.5.x
 
 # Test reports
 test-reports

diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go
@@ -263,7 +263,7 @@ const (
 	BifrostContextKeyTargetUserID                        BifrostContextKey = "target_user_id"
 	BifrostContextKeyIsAzureUserAgent                    BifrostContextKey = "bifrost-is-azure-user-agent" // bool (set by bifrost - DO NOT SET THIS MANUALLY)) - whether the request is an Azure user agent (only used in gateway)
 	BifrostContextKeyVideoOutputRequested                BifrostContextKey = "bifrost-video-output-requested"
-BifrostContextKeyValidateKeys                        BifrostContextKey = "bifrost-validate-keys"                         // bool (triggers additional key validation during provider add/update)
+	BifrostContextKeyValidateKeys                        BifrostContextKey = "bifrost-validate-keys"                         // bool (triggers additional key validation during provider add/update)
 	BifrostContextKeyProviderResponseHeaders             BifrostContextKey = "bifrost-provider-response-headers"             // map[string]string (set by provider handlers for response header forwarding)
 	BifrostContextKeyMCPAddedTools                       BifrostContextKey = "bifrost-mcp-added-tools"                       // []string (set by bifrost - DO NOT SET THIS MANUALLY)) - list of tools added to the request by MCP, all the tool are in the format "clientName-toolName"
 	BifrostContextKeyLargePayloadMode                    BifrostContextKey = "bifrost-large-payload-mode"                    // bool (set by bifrost - DO NOT SET THIS MANUALLY)) indicates large payload streaming mode is active
@@ -287,7 +287,7 @@ BifrostContextKeyValidateKeys                        BifrostContextKey = "bifros
 	BifrostContextKeySessionID                           BifrostContextKey = "bifrost-session-id"                            // string session ID for the request (session stickiness)
 	BifrostContextKeySessionTTL                          BifrostContextKey = "bifrost-session-ttl"                           // time.Duration session TTL for the request (session stickiness)
 	BifrostContextKeyMCPExtraHeaders                     BifrostContextKey = "bifrost-mcp-extra-headers"                     // map[string][]string (these headers are forwarded only to the MCP while tool execution if they are in the allowlist of the MCP client)
-	BifrostContextKeyMCPLogID                            BifrostContextKey = "bifrost-mcp-log-id"                             // string (unique UUID for each MCP tool log entry - set per goroutine by agent executor - DO NOT SET THIS MANUALLY)
+	BifrostContextKeyMCPLogID                            BifrostContextKey = "bifrost-mcp-log-id"                            // string (unique UUID for each MCP tool log entry - set per goroutine by agent executor - DO NOT SET THIS MANUALLY)
 	BifrostContextKeyCompatConvertTextToChat             BifrostContextKey = "bifrost-compat-convert-text-to-chat"           // bool (per-request override from x-bf-compat header)
 	BifrostContextKeyCompatConvertChatToResponses        BifrostContextKey = "bifrost-compat-convert-chat-to-responses"      // bool (per-request override from x-bf-compat header)
 	BifrostContextKeyCompatShouldDropParams              BifrostContextKey = "bifrost-compat-should-drop-params"             // bool (per-request override from x-bf-compat header)
@@ -296,7 +296,7 @@ BifrostContextKeyValidateKeys                        BifrostContextKey = "bifros
 	BifrostContextKeyDimensions                          BifrostContextKey = "bifrost-dimensions"                            // map[string]string (set by HTTP transport from x-bf-dim-* headers) BifrostContextKeyDimensions holds per-request key/value dimensions supplied via x-bf-dim-<key> request headers. These dimensions are forwarded to internal logs (as metadata)
 	BifrostContextKeySkipModelCatalogProviderSelection   BifrostContextKey = "bifrost-skip-model-catalog-provider-selection" // bool (set by bifrost - DO NOT SET THIS MANUALLY)) - skip model catalog provider selection
 	IsAPIKeyAuthContextKey                               BifrostContextKey = "is_api_key_auth"
-	IsLocalAdminContextKey                               BifrostContextKey = "is_local_admin"                                // bool (set by auth middleware when password-based auth succeeds - local admin user bypasses RBAC)
+	IsLocalAdminContextKey                               BifrostContextKey = "is_local_admin" // bool (set by auth middleware when password-based auth succeeds - local admin user bypasses RBAC)
 )
 
 const (
@@ -1242,6 +1242,10 @@ type BifrostCacheDebug struct {
 	// Semantic cache only (only when cache is hit)
 	Threshold  *float64 `json:"threshold,omitempty"`
 	Similarity *float64 `json:"similarity,omitempty"`
+
+	// CacheHitLatency is the time in milliseconds spent serving the cache hit
+	// (lookup + response build). Only set when CacheHit is true.
+	CacheHitLatency *int64 `json:"cache_hit_latency,omitempty"`
 }
 
 const (

diff --git a/core/schemas/context.go b/core/schemas/context.go
@@ -127,6 +127,41 @@ func (bc *BifrostContext) WithValue(key any, value any) *BifrostContext {
 	return bc
 }
 
+// Root returns the underlying root BifrostContext. For root contexts this is
+// the receiver itself; for plugin-scoped contexts it is the underlying root
+// that scoped Value/SetValue calls delegate to.
+//
+// PLUGIN AUTHORS: capture Root() synchronously inside Pre/PostLLMHook (or
+// any other hook) when you need to write to the context from a goroutine
+// that outlives the hook. The plugin-scoped *BifrostContext passed into your
+// hook is reclaimed by an internal sync.Pool the moment the hook returns —
+// any later SetValue/Value call on it lands in detached storage that nobody
+// downstream can read (and can leak into a future pool reuse). The root,
+// in contrast, lives for the entire request, so a pointer captured here is
+// safe to use for the lifetime of the request even after your hook returns.
+//
+// Example:
+//
+//	func (p *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req ...) (...) {
+//	    rootCtx := ctx.Root() // capture before the scope is released
+//	    go func() {
+//	        // ... long-running work that produces stream chunks ...
+//	        rootCtx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
+//	    }()
+//	    return req, &schemas.LLMPluginShortCircuit{Stream: ch}, nil
+//	}
+func (bc *BifrostContext) Root() *BifrostContext {
+	// Unwrap the full delegation chain. A scoped context can in principle be
+	// derived from another scoped context (e.g. nested plugin scopes), and
+	// stopping at the first valueDelegate would return an intermediate pooled
+	// scope — which loses the async-safety guarantee as soon as that
+	// intermediate scope is released.
+	for bc != nil && bc.valueDelegate != nil {
+		bc = bc.valueDelegate
+	}
+	return bc
+}
+
 // BlockRestrictedWrites returns true if restricted writes are blocked.
 func (bc *BifrostContext) BlockRestrictedWrites() {
 	bc.blockRestrictedWrites.Store(true)

diff --git a/core/schemas/context_test.go b/core/schemas/context_test.go
@@ -329,3 +329,37 @@ func TestPluginLog_PoolReuse(t *testing.T) {
 		t.Errorf("expected 100 logs from pool reuse, got %d", len(logs))
 	}
 }
+
+// TestRoot_UnwrapsChainedValueDelegates verifies Root() walks the entire
+// delegate chain. A naive single-step unwrap would return an intermediate
+// pooled scope, which loses the async-safety guarantee as soon as that
+// intermediate scope is recycled.
+func TestRoot_UnwrapsChainedValueDelegates(t *testing.T) {
+	root := NewBifrostContext(context.Background(), NoDeadline)
+
+	a := "outer"
+	b := "inner"
+	outer := root.WithPluginScope(&a)
+	// Manually build a second scoped context whose delegate is the first
+	// scoped context — simulates a plugin that derives its own scope from
+	// an already-scoped ctx.
+	inner := &BifrostContext{
+		parent:        outer.parent,
+		done:          outer.done,
+		pluginScope:   &b,
+		valueDelegate: outer,
+	}
+
+	got := inner.Root()
+	if got != root {
+		t.Fatalf("Root() did not walk the chain to the request root: got %p, want %p", got, root)
+	}
+	if got.valueDelegate != nil {
+		t.Fatalf("Root() returned a context with a non-nil valueDelegate: %+v", got)
+	}
+
+	// Sanity: Root() on a non-scoped context returns itself.
+	if root.Root() != root {
+		t.Fatal("Root() on a non-scoped context should return the receiver")
+	}
+}
diff --git a/docs/features/semantic-caching.mdx b/docs/features/semantic-caching.mdx
@@ -169,7 +169,9 @@ bifrostConfig := schemas.BifrostConfig{
 **Cache Settings**:
 - **TTL (seconds)**: How long cached responses are kept (default: 300 s).
 - **Similarity Threshold**: Cosine similarity cutoff for a cache hit (0–1, default: 0.8).
-- **Dimension**: Vector dimension matching your embedding model (e.g. 1536 for `text-embedding-3-small`).
+- **Dimension**: Vector size produced by the embedding model — must match the model exactly. Common values: `1536` for OpenAI `text-embedding-3-small`, `3072` for `text-embedding-3-large`, `768` for many Cohere/Voyage models. Use `1` only in direct-only mode (no provider).
+
+> **Heads up**: a vector store namespace can only hold vectors of *one* dimension. Whenever you change the embedding **provider**, **model**, or **dimension**, make sure the new dimension still matches what the model produces — otherwise writes to the existing namespace will fail and reads will silently miss. The namespace is **not** recreated automatically; either point `vector_store_namespace` at a fresh name or drop the existing class/index in your vector store before saving.
 
 **Conversation Settings**:
 - **Conversation History Threshold**: Skip caching when the conversation has more than this many messages (default: 3).
@@ -612,6 +614,7 @@ Example HTTP Response:
   "extra_fields": {
     "cache_debug": {
       "cache_hit": false,
+      "cache_id": "550e8500-e29b-41d4-a725-446655440001",
       "provider_used": "openai",
       "model_used": "gpt-4o-mini",
       "input_tokens": 20
@@ -620,22 +623,21 @@ Example HTTP Response:
 }
 ```
 
-
-These variables allow you to detect cached responses and get the cache entry ID needed for clearing specific entries.
+`cache_debug` is populated on both hits and misses. `cache_id` is the storage ID of the entry — use it to invalidate the entry later. The embedding-related fields (`provider_used`, `model_used`, `input_tokens`) are only present when semantic search actually ran.
 
 ### Clear Specific Cache Entry
 
-Use the request ID from cached responses to clear specific entries:
+Use the `cache_id` from `cache_debug` to clear a specific entry:
 
 <Tabs group="cache-clear">
 
 <Tab title="Go SDK">
 
 ```go
-// Clear specific entry by request ID
-err := plugin.ClearCacheForRequestID("550e8400-e29b-41d4-a716-446655440000")
+// Clear specific entry by cache ID (read from response.ExtraFields.CacheDebug.CacheID)
+err := plugin.ClearCacheForCacheID("550e8500-e29b-41d4-a725-446655440001")
 
-// Clear all entries for a cache key  
+// Clear all entries for a cache key
 err := plugin.ClearCacheForKey("support-session-456")
 ```
 
@@ -644,8 +646,8 @@ err := plugin.ClearCacheForKey("support-session-456")
 <Tab title="HTTP API">
 
 ```bash
-# Clear specific cached entry by request ID
-curl -X DELETE http://localhost:8080/api/cache/clear/550e8400-e29b-41d4-a716-446655440000
+# Clear specific cached entry by cache ID
+curl -X DELETE http://localhost:8080/api/cache/clear/550e8500-e29b-41d4-a725-446655440001
 
 # Clear all entries for a cache key
 curl -X DELETE http://localhost:8080/api/cache/clear-by-key/support-session-456
@@ -665,7 +667,7 @@ The semantic cache automatically handles cleanup to prevent storage bloat:
 - **Namespace Isolation**: Each Bifrost instance uses isolated vector store namespaces to prevent conflicts
 
 **Manual Cleanup Options:**
-- Clear specific entries by request ID (see examples above)
+- Clear specific entries by cache ID (see examples above)
 - Clear all entries for a cache key
 - Restart Bifrost to clear all cache data
 
@@ -674,7 +676,11 @@ The semantic cache namespace and all its cache entries are deleted when Bifrost
 </Warning>
 
 <Warning>
-**Dimension Changes**: If you update the `dimension` config, the existing namespace will contain data with mixed dimensions, causing retrieval issues. To avoid this, either use a different `vector_store_namespace` or set `cleanup_on_shutdown: true` before restarting.
+**Dimension / Provider / Model Changes**: A vector store namespace can only hold vectors of **one** dimension. If you change `dimension` (or switch to an embedding `provider`/`model` that produces a different vector size), the existing namespace is **not** recreated automatically — `CreateNamespace` is a no-op when the class/collection already exists. Subsequent writes will fail (vector-size mismatch) and reads will silently miss. Before saving the change, either:
+
+- point `vector_store_namespace` at a fresh name, or
+- drop the existing class/index in your vector store, or
+- set `cleanup_on_shutdown: true` and restart so the old namespace is removed first.
 </Warning>
 
 ---

diff --git a/docs/migration-guides/v1.5.0.mdx b/docs/migration-guides/v1.5.0.mdx
@@ -521,6 +521,67 @@ Single-key, pinned (`x-bf-key-id` / `x-bf-key-name`), and session-sticky request
 
 ---
 
+## Breaking Change 13: Semantic Cache Clear API is Now Cache-ID Based
+
+The semantic cache "clear by request ID" API has been removed. Storage IDs in the cache are deterministic UUIDv5 hashes derived from the request payload (so the same prompt across many requests maps to a single cache entry), which made the previous request-ID-based delete unable to match anything written by the direct-search path.
+
+The replacement is keyed on the cache entry's storage ID, which is now stamped on every response in `extra_fields.cache_debug.cache_id` — on cache hits **and** cache misses. Hold onto that ID from the response if you ever need to invalidate the entry.
+
+### REST API
+
+| Before (v1.4.x) | After (v1.5.0) |
+|---|---|
+| `DELETE /api/cache/clear/{requestId}` | `DELETE /api/cache/clear/{cacheId}` |
+
+The path parameter name and meaning both changed. The cache key endpoint (`DELETE /api/cache/clear-by-key/{cacheKey}`) is unchanged.
+
+**Before:**
+```bash
+curl -X DELETE localhost:8080/api/cache/clear/req-aaa-bbb-ccc
+```
+
+**After:**
+```bash
+# Read the cache ID from a prior response
+CACHE_ID=$(curl ... | jq -r '.extra_fields.cache_debug.cache_id')
+
+curl -X DELETE localhost:8080/api/cache/clear/$CACHE_ID
+```
+
+### Go SDK
+
+The `ClearCacheForRequestID` method on `*semanticcache.Plugin` has been removed and replaced by `ClearCacheForCacheID`.
+
+**Before:**
+```go
+err := plugin.ClearCacheForRequestID(requestID)
+```
+
+**After:**
+```go
+// On hit or miss, the storage ID is exposed via CacheDebug.CacheID
+cacheID := response.ExtraFields.CacheDebug.CacheID
+if cacheID != nil {
+    err := plugin.ClearCacheForCacheID(*cacheID)
+}
+```
+
+### Why the rename
+
+A single cache entry is reused across many request IDs (that is the point of caching). A request-ID-based delete only ever made sense for the original writer of the entry, and even that broke once direct search switched to deterministic storage IDs. The cache ID is the only stable handle that works for both writers and readers, so the API now reflects that.
+
+### CacheDebug on misses
+
+`extra_fields.cache_debug` is now populated on cache misses too — previously it was only emitted when semantic search ran. The new fields on a miss:
+
+- `cache_hit: false`
+- `cache_id`: the storage ID where the entry was written (use this with `ClearCacheForCacheID`)
+- `provider_used` / `model_used` / `input_tokens`: only present when semantic search actually ran (i.e. embedding model was invoked)
+
+If you parse `cache_debug` and assumed it was either absent or had `cache_hit: true`, update your consumer to handle the `cache_hit: false` shape.
+
+---
+
 ## Opting Out: `version: 1` Compatibility Mode
 
 If you are not ready to adopt the new deny-by-default semantics, you can add a single field to `config.json` to restore v1.4.x behavior for all allow-list fields loaded from that file:
@@ -611,6 +672,10 @@ Replace `.Model` with `.RequestedModel` (and optionally `.ResolvedModel`) on any
 <Step title="Handle empty selected_key_id on terminal retry failures">
 If your code reads `selected_key_id` / `selected_key_name` from the request context or log entries to attribute failed requests, add a null/empty check and fall back to `attempt_trail` for the full per-attempt key history.
 </Step>
+
+<Step title="Switch semantic cache invalidation to cache IDs">
+Replace `DELETE /api/cache/clear/{requestId}` with `DELETE /api/cache/clear/{cacheId}`, and replace `plugin.ClearCacheForRequestID(...)` with `plugin.ClearCacheForCacheID(...)`. Read the cache ID from `extra_fields.cache_debug.cache_id` on the response (now populated on misses too).
+</Step>
 </Steps>
 
 ---

diff --git a/docs/openapi/openapi.json b/docs/openapi/openapi.json
@@ -41769,20 +41769,20 @@
         }
       }
     },
-    "/api/cache/clear/{requestId}": {
+    "/api/cache/clear/{cacheId}": {
       "delete": {
-        "operationId": "clearCacheByRequestId",
-        "summary": "Clear cache by request ID",
-        "description": "Clears cache entries associated with a specific request ID.",
+        "operationId": "clearCacheByCacheId",
+        "summary": "Clear cache entry by cache ID",
+        "description": "Deletes a single cache entry by its storage ID. Read the cache ID from\n`extra_fields.cache_debug.cache_id` on a prior response — it is populated\non both cache hits and cache misses.\n",
         "tags": [
           "Cache"
         ],
         "parameters": [
           {
-            "name": "requestId",
+            "name": "cacheId",
             "in": "path",
             "required": true,
-            "description": "Request ID to clear cache for",
+            "description": "Storage ID of the cache entry to delete",
             "schema": {
               "type": "string"
             }

diff --git a/docs/openapi/openapi.yaml b/docs/openapi/openapi.yaml
@@ -786,8 +786,8 @@ paths:
     $ref: './paths/management/prompts.yaml#/sessions-commit'
 
   # Cache
-  /api/cache/clear/{requestId}:
-    $ref: './paths/management/cache.yaml#/clear-by-request-id'
+  /api/cache/clear/{cacheId}:
+    $ref: './paths/management/cache.yaml#/clear-by-cache-id'
   /api/cache/clear-by-key/{cacheKey}:
     $ref: './paths/management/cache.yaml#/clear-by-cache-key'
 

diff --git a/docs/openapi/paths/management/cache.yaml b/docs/openapi/paths/management/cache.yaml
@@ -1,15 +1,18 @@
-clear-by-request-id:
+clear-by-cache-id:
   delete:
-    operationId: clearCacheByRequestId
-    summary: Clear cache by request ID
-    description: Clears cache entries associated with a specific request ID.
+    operationId: clearCacheByCacheId
+    summary: Clear cache entry by cache ID
+    description: |
+      Deletes a single cache entry by its storage ID. Read the cache ID from
+      `extra_fields.cache_debug.cache_id` on a prior response — it is populated
+      on both cache hits and cache misses.
     tags:
       - Cache
     parameters:
-      - name: requestId
+      - name: cacheId
         in: path
         required: true
-        description: Request ID to clear cache for
+        description: Storage ID of the cache entry to delete
         schema:
           type: string
     responses:

diff --git a/framework/logstore/matviews.go b/framework/logstore/matviews.go
@@ -188,7 +188,8 @@ func canUseMatViewFilters(f SearchFilters) bool {
 		f.MinLatency == nil && f.MaxLatency == nil &&
 		f.MinTokens == nil && f.MaxTokens == nil &&
 		f.MinCost == nil && f.MaxCost == nil &&
-		!f.MissingCostOnly
+		!f.MissingCostOnly &&
+		len(f.CacheHitTypes) == 0
 }
 
 // canUseMatView checks both that materialized views are ready (created and