diff --git a/.claude/skills/changelog-writer/SKILL.md b/.claude/skills/changelog-writer/SKILL.md
index 51b450471a..8f67f639c9 100644
--- a/.claude/skills/changelog-writer/SKILL.md
+++ b/.claude/skills/changelog-writer/SKILL.md
@@ -335,7 +335,7 @@ bifrost/
 │   ├── maxim/version
 │   ├── mocker/version
 │   ├── otel/version
-│   ├── semanticcache/version
+│   ├── localcache/version
 │   └── telemetry/version
 ├── transports/
 │   ├── version              # Plain text: "1.5.0"
@@ -353,9 +353,9 @@ This is the canonical order for plugins:
 1. governance
 2. jsonparser
 3. litellmcompat
-4. logging
-5. maxim
-6. mocker
-7. otel
-8. semanticcache
+4. localcache
+5. logging
+6. maxim
+7. mocker
+8. otel
 9. telemetry
diff --git a/.claude/skills/docs-writer/SKILL.md b/.claude/skills/docs-writer/SKILL.md
index 61929a4f67..49072c8c82 100644
--- a/.claude/skills/docs-writer/SKILL.md
+++ b/.claude/skills/docs-writer/SKILL.md
@@ -43,7 +43,7 @@ Parse the feature name and map it to codebase areas. Common feature-to-directory
 | mcp | `ui/app/workspace/mcp-registry/` | `handlers/mcp.go` | `mcp` | `docs/mcp/` |
 | plugins | `ui/app/workspace/plugins/` | `handlers/plugins.go` | `plugins` | `docs/features/plugins/` |
 | logs / observability | `ui/app/workspace/logs/` | `handlers/logging.go` | `client.enable_logging` | `docs/features/observability/` |
-| semantic-caching | `ui/app/workspace/config/caching/` | `handlers/cache.go` | `plugins.semantic_cache` + `vector_store` | `docs/features/semantic-caching.mdx` |
+| local-caching | `ui/app/workspace/config/caching/` | `handlers/cache.go` + `handlers/local_cache.go` | `client.enable_local_cache` + `local_cache` + `vector_store` | `docs/features/local-caching.mdx` |
 | guardrails | `ui/app/workspace/guardrails/` | Enterprise | `guardrails_config` | `docs/enterprise/guardrails.mdx` |
 | clustering | `ui/app/workspace/cluster/` | Enterprise | `cluster_config` | `docs/enterprise/clustering.mdx` |
 | load-balancing | `ui/app/workspace/adaptive-routing/` | Enterprise | `load_balancer_config` | `docs/enterprise/adaptive-load-balancing.mdx` |
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 25fa245ac1..ec7184bf8e 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -200,7 +200,7 @@ updates:
     open-pull-requests-limit: 0
 
   - package-ecosystem: gomod
-    directory: /plugins/semanticcache
+    directory: /plugins/localcache
     schedule:
       interval: daily
     open-pull-requests-limit: 0
diff --git a/.github/workflows/configs/withsemanticcache/config.json b/.github/workflows/configs/withlocalcache/config.json
similarity index 52%
rename from .github/workflows/configs/withsemanticcache/config.json
rename to .github/workflows/configs/withlocalcache/config.json
index 90fb65f670..230f6d3a2f 100644
--- a/.github/workflows/configs/withsemanticcache/config.json
+++ b/.github/workflows/configs/withlocalcache/config.json
@@ -1,5 +1,8 @@
 {
   "$schema": "https://www.getbifrost.ai/schema",
+  "client": {
+    "enable_local_cache": true
+  },
   "vector_store": {
     "enabled": true,
     "type": "weaviate",
@@ -8,14 +11,8 @@
       "host": "localhost:9000"
     }
   },
-  "plugins": [
-    {
-      "enabled": true,
-      "name": "semantic_cache",
-      "config": {
-        "dimension": 1,
-        "vector_store_namespace": "test"
-      }
-    }
-  ]
-}
\ No newline at end of file
+  "local_cache": {
+    "dimension": 1,
+    "vector_store_namespace": "test"
+  }
+}
diff --git a/.github/workflows/scripts/setup-go-workspace.sh b/.github/workflows/scripts/setup-go-workspace.sh
index 29024fdaa3..0226254b03 100755
--- a/.github/workflows/scripts/setup-go-workspace.sh
+++ b/.github/workflows/scripts/setup-go-workspace.sh
@@ -28,7 +28,7 @@ go work use ./plugins/maxim
 go work use ./plugins/mocker
 go work use ./plugins/otel
 go work use ./plugins/prompts
-go work use ./plugins/semanticcache
+go work use ./plugins/localcache
 go work use ./plugins/telemetry
 go work use ./transports
 go work use ./cli
diff --git a/.github/workflows/scripts/test-bifrost-http.sh b/.github/workflows/scripts/test-bifrost-http.sh
index 4f42c34ade..714de588bb 100755
--- a/.github/workflows/scripts/test-bifrost-http.sh
+++ b/.github/workflows/scripts/test-bifrost-http.sh
@@ -90,7 +90,7 @@ CONFIGS_TO_TEST=(
   "withconfigstorelogsstoresqlite"
   "withdynamicplugin"
   "withobservability"
-  "withsemanticcache"
+  "withlocalcache"
   "withpostgresmcpclientsinconfig"
 )
 
diff --git a/.github/workflows/scripts/validate-helm-config-fields.sh b/.github/workflows/scripts/validate-helm-config-fields.sh
index 465e0e799a..201fa01244 100755
--- a/.github/workflows/scripts/validate-helm-config-fields.sh
+++ b/.github/workflows/scripts/validate-helm-config-fields.sh
@@ -586,7 +586,8 @@ assert_field_value 'auth_config.is_enabled' '.auth_config.is_enabled' 'true'
 assert_field_value 'auth_config.disable_auth_on_inference' '.auth_config.disable_auth_on_inference' 'false'
 
 ###############################################################################
-# 6. Plugins (telemetry, logging, governance, maxim, semantic_cache, otel, datadog, custom)
+# 6. Plugins (telemetry, logging, governance, maxim, otel, datadog, custom) +
+#    top-level local_cache block (no longer a plugins[] entry as of v1.5.0)
 ###############################################################################
 echo ""
 echo -e "${CYAN}🔌 6/10 - Plugins Configuration${NC}"
@@ -596,6 +597,9 @@ cat > "$TMPDIR/values-plugins.yaml" << 'VALS'
 image:
   tag: v1.0.0
 bifrost:
+  client:
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   plugins:
     telemetry:
       enabled: true
@@ -627,22 +631,6 @@ bifrost:
       secretRef:
         name: ""
         key: "api-key"
-    semanticCache:
-      enabled: true
-      config:
-        provider: "openai"
-        keys:
-          - "sk-embed-key"
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.85
-        ttl: "10m"
-        conversation_history_threshold: 5
-        cache_by_model: true
-        cache_by_provider: false
-        exclude_system_prompt: true
-        cleanup_on_shutdown: true
-        vector_store_namespace: "bifrost-cache"
     otel:
       enabled: true
       config:
@@ -674,6 +662,22 @@ bifrost:
         version: 2
         config:
           key1: "val1"
+# Local cache plugin configuration (top-level, sibling of bifrost / vectorStore)
+localCache:
+  keys:
+    - "sk-embed-key"
+  config:
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.85
+    ttl: "10m"
+    conversation_history_threshold: 5
+    cache_by_model: true
+    cache_by_provider: false
+    exclude_system_prompt: true
+    cleanup_on_shutdown: true
+    vector_store_namespace: "bifrost-cache"
 vectorStore:
   enabled: true
   type: weaviate
@@ -702,48 +706,48 @@ assert_field_value 'plugins: maxim name' '.plugins.[3].name' '"maxim"'
 assert_field_value 'plugins: maxim api_key' '.plugins.[3].config.api_key' '"maxim-key-123"'
 assert_field_value 'plugins: maxim log_repo_id' '.plugins.[3].config.log_repo_id' '"repo-456"'
 
-# Semantic cache plugin
-assert_field_value 'plugins: semantic_cache name' '.plugins.[4].name' '"semantic_cache"'
-assert_field_value 'plugins: semantic_cache provider' '.plugins.[4].config.provider' '"openai"'
-assert_field 'plugins: semantic_cache keys' '.plugins.[4].config.keys'
-assert_field_value 'plugins: semantic_cache embedding_model' '.plugins.[4].config.embedding_model' '"text-embedding-3-small"'
-assert_field_value 'plugins: semantic_cache dimension' '.plugins.[4].config.dimension' '1536'
-assert_field_value 'plugins: semantic_cache threshold' '.plugins.[4].config.threshold' '0.85'
-assert_field_value 'plugins: semantic_cache ttl' '.plugins.[4].config.ttl' '"10m"'
-assert_field_value 'plugins: semantic_cache conversation_history_threshold' '.plugins.[4].config.conversation_history_threshold' '5'
-assert_field_value 'plugins: semantic_cache cache_by_model' '.plugins.[4].config.cache_by_model' 'true'
-assert_field_value 'plugins: semantic_cache cache_by_provider' '.plugins.[4].config.cache_by_provider' 'false'
-assert_field_value 'plugins: semantic_cache exclude_system_prompt' '.plugins.[4].config.exclude_system_prompt' 'true'
-assert_field_value 'plugins: semantic_cache cleanup_on_shutdown' '.plugins.[4].config.cleanup_on_shutdown' 'true'
-assert_field_value 'plugins: semantic_cache vector_store_namespace' '.plugins.[4].config.vector_store_namespace' '"bifrost-cache"'
-
-# OTEL plugin
-assert_field_value 'plugins: otel name' '.plugins.[5].name' '"otel"'
-assert_field_value 'plugins: otel service_name' '.plugins.[5].config.service_name' '"bifrost-test"'
-assert_field_value 'plugins: otel collector_url' '.plugins.[5].config.collector_url' '"otel-collector:4317"'
-assert_field_value 'plugins: otel trace_type' '.plugins.[5].config.trace_type' '"genai_extension"'
-assert_field_value 'plugins: otel protocol' '.plugins.[5].config.protocol' '"grpc"'
-assert_field_value 'plugins: otel metrics_enabled' '.plugins.[5].config.metrics_enabled' 'true'
-assert_field_value 'plugins: otel metrics_endpoint' '.plugins.[5].config.metrics_endpoint' '"otel-collector:4317"'
-assert_field_value 'plugins: otel metrics_push_interval' '.plugins.[5].config.metrics_push_interval' '30'
-assert_field 'plugins: otel headers' '.plugins.[5].config.headers'
-assert_field_value 'plugins: otel tls_ca_cert' '.plugins.[5].config.tls_ca_cert' '"/certs/ca.pem"'
-assert_field_value 'plugins: otel insecure' '.plugins.[5].config.insecure' 'true'
+# Local cache (top-level block, not a plugins[] entry as of v1.5.0)
+assert_field_value 'client.enable_local_cache' '.client.enable_local_cache' 'true'
+assert_field_value 'local_cache provider' '.local_cache.provider' '"openai"'
+assert_field 'local_cache keys' '.local_cache.keys'
+assert_field_value 'local_cache embedding_model' '.local_cache.embedding_model' '"text-embedding-3-small"'
+assert_field_value 'local_cache dimension' '.local_cache.dimension' '1536'
+assert_field_value 'local_cache threshold' '.local_cache.threshold' '0.85'
+assert_field_value 'local_cache ttl' '.local_cache.ttl' '"10m"'
+assert_field_value 'local_cache conversation_history_threshold' '.local_cache.conversation_history_threshold' '5'
+assert_field_value 'local_cache cache_by_model' '.local_cache.cache_by_model' 'true'
+assert_field_value 'local_cache cache_by_provider' '.local_cache.cache_by_provider' 'false'
+assert_field_value 'local_cache exclude_system_prompt' '.local_cache.exclude_system_prompt' 'true'
+assert_field_value 'local_cache cleanup_on_shutdown' '.local_cache.cleanup_on_shutdown' 'true'
+assert_field_value 'local_cache vector_store_namespace' '.local_cache.vector_store_namespace' '"bifrost-cache"'
+
+# OTEL plugin (was index [5] when local_cache lived in plugins[]; now [4])
+assert_field_value 'plugins: otel name' '.plugins.[4].name' '"otel"'
+assert_field_value 'plugins: otel service_name' '.plugins.[4].config.service_name' '"bifrost-test"'
+assert_field_value 'plugins: otel collector_url' '.plugins.[4].config.collector_url' '"otel-collector:4317"'
+assert_field_value 'plugins: otel trace_type' '.plugins.[4].config.trace_type' '"genai_extension"'
+assert_field_value 'plugins: otel protocol' '.plugins.[4].config.protocol' '"grpc"'
+assert_field_value 'plugins: otel metrics_enabled' '.plugins.[4].config.metrics_enabled' 'true'
+assert_field_value 'plugins: otel metrics_endpoint' '.plugins.[4].config.metrics_endpoint' '"otel-collector:4317"'
+assert_field_value 'plugins: otel metrics_push_interval' '.plugins.[4].config.metrics_push_interval' '30'
+assert_field 'plugins: otel headers' '.plugins.[4].config.headers'
+assert_field_value 'plugins: otel tls_ca_cert' '.plugins.[4].config.tls_ca_cert' '"/certs/ca.pem"'
+assert_field_value 'plugins: otel insecure' '.plugins.[4].config.insecure' 'true'
 
 # Datadog plugin
-assert_field_value 'plugins: datadog name' '.plugins.[6].name' '"datadog"'
-assert_field_value 'plugins: datadog service_name' '.plugins.[6].config.service_name' '"bifrost-dd"'
-assert_field_value 'plugins: datadog agent_addr' '.plugins.[6].config.agent_addr' '"dd-agent:8126"'
-assert_field_value 'plugins: datadog env' '.plugins.[6].config.env' '"staging"'
-assert_field_value 'plugins: datadog version' '.plugins.[6].config.version' '"1.0.0"'
-assert_field 'plugins: datadog custom_tags' '.plugins.[6].config.custom_tags'
-assert_field_value 'plugins: datadog enable_traces' '.plugins.[6].config.enable_traces' 'true'
+assert_field_value 'plugins: datadog name' '.plugins.[5].name' '"datadog"'
+assert_field_value 'plugins: datadog service_name' '.plugins.[5].config.service_name' '"bifrost-dd"'
+assert_field_value 'plugins: datadog agent_addr' '.plugins.[5].config.agent_addr' '"dd-agent:8126"'
+assert_field_value 'plugins: datadog env' '.plugins.[5].config.env' '"staging"'
+assert_field_value 'plugins: datadog version' '.plugins.[5].config.version' '"1.0.0"'
+assert_field 'plugins: datadog custom_tags' '.plugins.[5].config.custom_tags'
+assert_field_value 'plugins: datadog enable_traces' '.plugins.[5].config.enable_traces' 'true'
 
 # Custom plugin
-assert_field_value 'plugins: custom name' '.plugins.[7].name' '"my-plugin"'
-assert_field_value 'plugins: custom path' '.plugins.[7].path' '"/plugins/my-plugin.so"'
-assert_field_value 'plugins: custom version' '.plugins.[7].version' '2'
-assert_field 'plugins: custom config' '.plugins.[7].config'
+assert_field_value 'plugins: custom name' '.plugins.[6].name' '"my-plugin"'
+assert_field_value 'plugins: custom path' '.plugins.[6].path' '"/plugins/my-plugin.so"'
+assert_field_value 'plugins: custom version' '.plugins.[6].version' '2'
+assert_field 'plugins: custom config' '.plugins.[6].config'
 
 ###############################################################################
 # 7. MCP Configuration
diff --git a/.github/workflows/scripts/validate-helm-schema.sh b/.github/workflows/scripts/validate-helm-schema.sh
index 5f012bc5c8..94c87c2760 100755
--- a/.github/workflows/scripts/validate-helm-schema.sh
+++ b/.github/workflows/scripts/validate-helm-schema.sh
@@ -470,18 +470,20 @@ fi
 echo ""
 echo "🔍 Checking required fields in plugin configs..."
 
-# Check semantic cache plugin required fields (dimension)
-# Config uses an allOf pattern on plugins array items; Helm uses conditional on semanticCache.enabled
-CONFIG_SEMCACHE_REQUIRED=$(jq -r '.properties.plugins.items.allOf[] | select(.if.properties.name.const == "semantic_cache") | .then.properties.config.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "")
-HELM_SEMCACHE_REQUIRED=$(jq -r '.properties.bifrost.properties.plugins.properties.semanticCache.then.properties.config.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "")
+# Check local cache required fields (dimension). The local cache moved
+# from a config_plugins entry to a top-level block in v1.5.0, so both
+# schemas now expose it as `local_cache` (config.schema.json) /
+# `localCache` (Helm values.schema.json) at the root, not under plugins[].
+CONFIG_LOCALCACHE_REQUIRED=$(jq -r '.properties.local_cache.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "")
+HELM_LOCALCACHE_REQUIRED=$(jq -r '.properties.localCache.properties.config.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "")
 
-if [ "$CONFIG_SEMCACHE_REQUIRED" != "$HELM_SEMCACHE_REQUIRED" ]; then
-  echo "❌ Semantic cache plugin config required fields mismatch:"
-  echo "   Config: [$CONFIG_SEMCACHE_REQUIRED]"
-  echo "   Helm:   [$HELM_SEMCACHE_REQUIRED]"
+if [ "$CONFIG_LOCALCACHE_REQUIRED" != "$HELM_LOCALCACHE_REQUIRED" ]; then
+  echo "❌ Local cache config required fields mismatch:"
+  echo "   Config: [$CONFIG_LOCALCACHE_REQUIRED]"
+  echo "   Helm:   [$HELM_LOCALCACHE_REQUIRED]"
   ERRORS=$((ERRORS + 1))
 else
-  echo "✅ Semantic cache plugin config required fields match: [$CONFIG_SEMCACHE_REQUIRED]"
+  echo "✅ Local cache config required fields match: [$CONFIG_LOCALCACHE_REQUIRED]"
 fi
 
 # Check OTEL plugin required fields (collector_url, trace_type, protocol)
@@ -643,7 +645,7 @@ check_property_exists "cluster.region" ".properties.bifrost.properties.cluster.p
 echo ""
 echo "  Checking miscellaneous properties (Gap 8)..."
 check_property_exists "telemetry.custom_labels" ".properties.bifrost.properties.plugins.properties.telemetry.properties.config.properties.custom_labels" "$HELM_SCHEMA"
-check_property_exists "semanticCache.default_cache_key" ".properties.bifrost.properties.plugins.properties.semanticCache.properties.config.properties.default_cache_key" "$HELM_SCHEMA"
+check_property_exists "localCache.config.default_cache_key" ".properties.localCache.properties.config.properties.default_cache_key" "$HELM_SCHEMA"
 
 # Also verify these exist in config.schema.json
 echo ""
diff --git a/.github/workflows/scripts/validate-helm-templates.sh b/.github/workflows/scripts/validate-helm-templates.sh
index ccab87838e..50d6b10e2b 100755
--- a/.github/workflows/scripts/validate-helm-templates.sh
+++ b/.github/workflows/scripts/validate-helm-templates.sh
@@ -178,29 +178,29 @@ echo ""
 echo -e "${CYAN}⚙️  3/6 - Testing Special Configurations (7 tests)...${NC}"
 echo "-----------------------------------------------------"
 
-# semantic cache: direct mode (dimension: 1, no provider/keys)
-test_template "semanticCache: direct mode (dimension: 1)" \
-  --set bifrost.plugins.semanticCache.enabled=true \
-  --set bifrost.plugins.semanticCache.config.dimension=1 \
-  --set bifrost.plugins.semanticCache.config.ttl=30m \
+# local cache: direct mode (dimension: 1, no provider/keys)
+test_template "localCache: direct mode (dimension: 1)" \
+  --set bifrost.client.enableLocalCache=true \
+  --set localCache.config.dimension=1 \
+  --set localCache.config.ttl=30m \
   --set vectorStore.enabled=true \
   --set vectorStore.type=redis \
   --set vectorStore.redis.enabled=true
 
-# semantic cache: semantic mode (dimension > 1, requires provider/keys)
-test_template "semanticCache: semantic mode (dimension: 1536)" \
-  --set bifrost.plugins.semanticCache.enabled=true \
-  --set bifrost.plugins.semanticCache.config.dimension=1536 \
-  --set bifrost.plugins.semanticCache.config.provider=openai \
-  --set 'bifrost.plugins.semanticCache.config.keys[0]=sk-test' \
+# local cache: semantic mode (dimension > 1, requires provider/keys)
+test_template "localCache: semantic mode (dimension: 1536)" \
+  --set bifrost.client.enableLocalCache=true \
+  --set localCache.config.dimension=1536 \
+  --set localCache.config.provider=openai \
+  --set 'localCache.keys[0]=sk-test' \
   --set vectorStore.enabled=true \
   --set vectorStore.type=redis \
   --set vectorStore.redis.enabled=true
 
-# semantic cache: direct mode with redis + postgres
-test_template "semanticCache: direct mode + postgres" \
-  --set bifrost.plugins.semanticCache.enabled=true \
-  --set bifrost.plugins.semanticCache.config.dimension=1 \
+# local cache: direct mode with redis + postgres
+test_template "localCache: direct mode + postgres" \
+  --set bifrost.client.enableLocalCache=true \
+  --set localCache.config.dimension=1 \
   --set storage.mode=postgres \
   --set postgresql.enabled=true \
   --set postgresql.auth.password=testpass \
@@ -335,20 +335,26 @@ echo ""
 echo -e "${CYAN}🔌 5/6 - Validating Plugin Names Match Go Registry...${NC}"
 echo "------------------------------------------------------"
 
-# Verify semantic cache plugin renders with correct name ("semantic_cache", not "semantic_cache")
-# Go registry: plugins/semantic_cache/main.go defines PluginName = "semantic_cache"
-test_name="semanticCache plugin name matches Go registry (semantic_cache)"
+# Verify the local cache renders as a top-level "local_cache" block plus
+# "enable_local_cache: true" on the client (the v1.5.0 shape — no longer a
+# plugins[] entry). Go side: plugins/localcache/main.go defines
+# PluginName = "local_cache".
+test_name="localCache renders as top-level block + client.enable_local_cache toggle"
 if helm template bifrost ./helm-charts/bifrost \
   --set image.tag=v1.0.0 \
-  --set bifrost.plugins.semanticCache.enabled=true \
-  --set bifrost.plugins.semanticCache.config.dimension=1536 \
-  --set bifrost.plugins.semanticCache.config.provider=openai \
-  --set 'bifrost.plugins.semanticCache.config.keys[0]=sk-test' \
+  --set bifrost.client.enableLocalCache=true \
+  --set localCache.config.dimension=1536 \
+  --set localCache.config.provider=openai \
+  --set 'localCache.keys[0]=sk-test' \
   --set vectorStore.enabled=true \
   --set vectorStore.type=redis \
   --set vectorStore.redis.enabled=true \
   > /tmp/helm-template-output.yaml 2>&1; then
-  if grep -Eq '"name"[[:space:]]*:[[:space:]]*"semantic_cache"' /tmp/helm-template-output.yaml; then
+  # Both signals must be present: the top-level local_cache block and the
+  # enable_local_cache flag on the client. Without either, the plugin
+  # won't load at boot.
+  if grep -Eq '"local_cache"[[:space:]]*:' /tmp/helm-template-output.yaml \
+     && grep -Eq '"enable_local_cache"[[:space:]]*:[[:space:]]*true' /tmp/helm-template-output.yaml; then
     report_result "$test_name" 0
   else
     report_result "$test_name" 1
diff --git a/.github/workflows/scripts/validate-schema-sync.sh b/.github/workflows/scripts/validate-schema-sync.sh
index 0214d0679b..88c80fbdcc 100755
--- a/.github/workflows/scripts/validate-schema-sync.sh
+++ b/.github/workflows/scripts/validate-schema-sync.sh
@@ -35,7 +35,7 @@ if [ ! -f "$REPO_ROOT/go.work" ]; then
     for mod in ./core ./framework \
                ./plugins/compat ./plugins/governance ./plugins/jsonparser \
                ./plugins/logging ./plugins/maxim ./plugins/mocker \
-               ./plugins/otel ./plugins/prompts ./plugins/semanticcache \
+               ./plugins/otel ./plugins/prompts ./plugins/localcache \
                ./plugins/telemetry \
                ./transports ./cli; do
       if [ -f "$REPO_ROOT/$mod/go.mod" ]; then
diff --git a/AGENTS.md b/AGENTS.md
index 83b20fd344..f30783d55c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -98,7 +98,7 @@ bifrost/
 │   ├── governance/                # Budget, rate limiting, virtual keys, routing, RBAC
 │   ├── telemetry/                 # Prometheus metrics, push gateway
 │   ├── logging/                   # Request/response audit logging
-│   ├── semanticcache/             # Semantic response caching via vector store
+│   ├── localcache/                # Direct & Semantic response caching via vector store
 │   ├── otel/                      # OpenTelemetry tracing
 │   ├── mocker/                    # Mock responses for testing
 │   ├── jsonparser/                # JSON extraction utilities
diff --git a/README.md b/README.md
index f957c58ee1..c2b8715b09 100644
--- a/README.md
+++ b/README.md
@@ -131,7 +131,7 @@ bifrost/
 │   ├── logging/         # Request logging and analytics
 │   ├── maxim/           # Maxim's observability integration
 │   ├── mocker/          # Mock responses for testing and development
-│   ├── semanticcache/   # Intelligent response caching
+│   ├── localcache/      # Intelligent response caching
 │   └── telemetry/       # Monitoring and observability
 ├── docs/                # Documentation and guides
 └── tests/               # Comprehensive test suites
diff --git a/docs/architecture/framework/streaming.mdx b/docs/architecture/framework/streaming.mdx
index 42cb3cd164..f519a9b279 100644
--- a/docs/architecture/framework/streaming.mdx
+++ b/docs/architecture/framework/streaming.mdx
@@ -114,7 +114,7 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas.
                 
                 // 3. Update the log entry with the complete data
                 processingErr := retryOnNotFound(p.ctx, func() error {
-                    return p.updateStreamingLogEntry(p.ctx, logMsg.RequestID, logMsg.SemanticCacheDebug, logMsg.StreamResponse, true)
+                    return p.updateStreamingLogEntry(p.ctx, logMsg.RequestID, logMsg.LocalCacheDebug, logMsg.StreamResponse, true)
                 })
                 
                 // ... handle errors and callbacks ...
diff --git a/docs/architecture/framework/vector-store.mdx b/docs/architecture/framework/vector-store.mdx
index c6dcc7fa85..ae4591a1e1 100644
--- a/docs/architecture/framework/vector-store.mdx
+++ b/docs/architecture/framework/vector-store.mdx
@@ -145,7 +145,7 @@ err = store.Delete(ctx, "my_content", "unique-id-123")
 
 ## Use Cases
 
-### [Semantic Caching](../../features/semantic-caching)
+### [Semantic Caching](../../features/local-caching)
 Build intelligent caching systems that understand query intent rather than just exact matches.
 
 **Applications:**
@@ -182,4 +182,4 @@ Build personalized recommendation engines using vector similarity.
 | Topic | Documentation | Description |
 |-------|---------------|-------------|
 | **Framework Overview** | [What is Framework](./what-is-framework) | Understanding the framework package and VectorStore interface |
-| **Semantic Caching** | [Semantic Caching](../../features/semantic-caching) | Using VectorStore for AI response caching |
+| **Semantic Caching** | [Semantic Caching](../../features/local-caching) | Using VectorStore for AI response caching |
diff --git a/docs/contributing/code-conventions.mdx b/docs/contributing/code-conventions.mdx
index c8a73b2774..8771fe1c11 100644
--- a/docs/contributing/code-conventions.mdx
+++ b/docs/contributing/code-conventions.mdx
@@ -245,21 +245,22 @@ make test-core PROVIDER=openai TESTCASE=TestName/SubTest
 
 ```mdx
 ---
-title: "Semantic Caching"
+title: "Local Caching"
 description: "Intelligent response caching based on semantic similarity"
 icon: "database"
 ---
 
 ## Overview
 
-Semantic caching intelligently caches responses...
+Local caching covers both direct hash matching and semantic similarity search...
 
 ## Configuration
 
 ```yaml
-plugins:
-  semantic_cache:
-    enabled: true
+client:
+  enable_local_cache: true
+local_cache:
+  dimension: 1
 ```
 
 <Note>
diff --git a/docs/contributing/raising-a-pr.mdx b/docs/contributing/raising-a-pr.mdx
index a732fae456..eafab7612e 100644
--- a/docs/contributing/raising-a-pr.mdx
+++ b/docs/contributing/raising-a-pr.mdx
@@ -75,7 +75,7 @@ When creating a commit, always mention which packages/components are affected:
 [feat]: Add semantic caching plugin
 
 Changes:
-- plugins/semanticcache/ - Core caching logic
+- plugins/localcache/ - Core caching logic
 - core/bifrost.go - Plugin registration
 - transports/bifrost-http/server.go - Cache middleware integration
 
@@ -350,15 +350,15 @@ Update file
 
 ❌ **Bad:**
 ```
-[feat]: Major update to semantic cache
+[feat]: Major update to local cache
 ```
 
 ✅ **Good:**
 ```
-[feat]: Add semantic cache initialization with retry logic
+[feat]: Add local cache initialization with retry logic
 
 Changes:
-- plugins/semanticcache/ - Add retry mechanism
+- plugins/localcache/ - Add retry mechanism
 - core/bifrost.go - Register cache handler
 - docs/ - Add usage documentation
 ```
diff --git a/docs/deployment-guides/config-json.mdx b/docs/deployment-guides/config-json.mdx
index 2c967bdb30..d1ccdfc674 100644
--- a/docs/deployment-guides/config-json.mdx
+++ b/docs/deployment-guides/config-json.mdx
@@ -325,12 +325,12 @@ Ready-to-use reference configurations from the [examples/configs](https://github
 
 </Accordion>
 
-<Accordion title="Semantic Cache">
+<Accordion title="Local Cache">
 
 | Example | Description |
 |---------|-------------|
-| [withsemanticcache](https://github.com/maximhq/bifrost/blob/main/examples/configs/withsemanticcache/config.json) | Semantic cache backed by Weaviate |
-| [withsemanticcachevalkey](https://github.com/maximhq/bifrost/blob/main/examples/configs/withsemanticcachevalkey/config.json) | Semantic cache backed by Valkey / Redis |
+| [withlocalcache](https://github.com/maximhq/bifrost/blob/main/examples/configs/withlocalcache/config.json) | Local cache backed by Weaviate |
+| [withlocalcachevalkey](https://github.com/maximhq/bifrost/blob/main/examples/configs/withlocalcachevalkey/config.json) | Local cache backed by Valkey / Redis |
 
 </Accordion>
 
diff --git a/docs/deployment-guides/config-json/plugins.mdx b/docs/deployment-guides/config-json/plugins.mdx
index 5bb8806dc2..87011bf51c 100644
--- a/docs/deployment-guides/config-json/plugins.mdx
+++ b/docs/deployment-guides/config-json/plugins.mdx
@@ -1,11 +1,11 @@
 ---
 title: "Plugins"
-description: "Configure Bifrost plugins in config.json - semantic cache, OpenTelemetry, Maxim, Datadog, and custom plugins"
+description: "Configure Bifrost plugins in config.json - OpenTelemetry, Maxim, Datadog, and custom plugins"
 icon: "puzzle-piece"
 ---
 
 <Note>
-**The `plugins` array only controls explicitly opt-in plugins**: `semantic_cache`, `otel`, `maxim`, `datadog` (enterprise), and custom plugins.
+**The `plugins` array only controls explicitly opt-in plugins**:  `otel`, `maxim`, `datadog` (enterprise), and custom plugins.
 
 **Telemetry, logging, and governance are auto-loaded built-ins** - they are always active and configured via the `client` block and dedicated top-level keys, not the `plugins` array.
 </Note>
@@ -48,9 +48,9 @@ Every entry in the `plugins` array supports these common fields:
 
 <Tabs>
 
-<Tab title="Semantic Cache">
+<Tab title="Local Cache">
 
-### Semantic Cache
+### Local Cache
 
 Caches LLM responses by semantic similarity. Returns a cached response when an incoming request is semantically close enough to a previous one.
 
@@ -75,7 +75,7 @@ Requires a [vector store](/deployment-guides/config-json/storage#vector_store) t
 {
   "plugins": [
     {
-      "name": "semantic_cache",
+      "name": "local_cache",
       "enabled": true,
       "config": {
         "provider": "openai",
@@ -97,7 +97,7 @@ Requires a [vector store](/deployment-guides/config-json/storage#vector_store) t
 {
   "plugins": [
     {
-      "name": "semantic_cache",
+      "name": "local_cache",
       "enabled": true,
       "config": {
         "dimension": 1,
diff --git a/docs/deployment-guides/config-json/schema-reference.mdx b/docs/deployment-guides/config-json/schema-reference.mdx
index 17d0bb39e2..3b53f94942 100644
--- a/docs/deployment-guides/config-json/schema-reference.mdx
+++ b/docs/deployment-guides/config-json/schema-reference.mdx
@@ -26,8 +26,8 @@ This page is a concise reference for every top-level key in `config.json`. Click
 | `cluster_config` | object | Cluster mode settings: gossip, peers, and auto-discovery backends *(enterprise only)* | [Cluster](/deployment-guides/config-json/cluster) |
 | `config_store` | object | Configuration database backend - SQLite, PostgreSQL, or disabled (file-only mode) | [Storage](/deployment-guides/config-json/storage#config_store) |
 | `logs_store` | object | Request/response log database - SQLite, PostgreSQL + optional S3/GCS offload | [Storage](/deployment-guides/config-json/storage#logs_store) |
-| `vector_store` | object | Vector database for semantic cache - Weaviate, Redis, Qdrant, Pinecone, Valkey | [Storage](/deployment-guides/config-json/storage#vector_store) |
-| `plugins` | array | Opt-in plugins: `semantic_cache`, `otel`, `maxim`, `datadog`, custom | [Plugins](/deployment-guides/config-json/plugins) |
+| `vector_store` | object | Vector database for local cache - Weaviate, Redis, Qdrant, Pinecone, Valkey | [Storage](/deployment-guides/config-json/storage#vector_store) |
+| `plugins` | array | Opt-in plugins:, `otel`, `maxim`, `datadog`, custom | [Plugins](/deployment-guides/config-json/plugins) |
 | `framework` | object | Model pricing catalog URL and sync interval | [Framework](#framework) |
 | `mcp` | object | MCP server and tool configuration | - |
 | `websocket` | object | WebSocket / Realtime API connection pool tuning | [WebSocket](#websocket) |
diff --git a/docs/deployment-guides/config-json/storage.mdx b/docs/deployment-guides/config-json/storage.mdx
index 56a9c0c261..54440898d8 100644
--- a/docs/deployment-guides/config-json/storage.mdx
+++ b/docs/deployment-guides/config-json/storage.mdx
@@ -315,7 +315,7 @@ Omit `credentials_json` to use Application Default Credentials (Workload Identit
 
 ## vector_store
 
-A vector store is required for [semantic caching](/features/semantic-caching). Choose from Weaviate, Redis/Valkey, Qdrant, or Pinecone.
+A vector store is required for [semantic caching](/features/local-caching). Choose from Weaviate, Redis/Valkey, Qdrant, or Pinecone.
 
 <Tabs>
 
diff --git a/docs/deployment-guides/helm.mdx b/docs/deployment-guides/helm.mdx
index 0f6b7a5a60..bba523095c 100644
--- a/docs/deployment-guides/helm.mdx
+++ b/docs/deployment-guides/helm.mdx
@@ -453,7 +453,7 @@ bifrost:
       version: 1
       config:
         is_vk_mandatory: true
-    semanticCache:
+    # local_cache moved out of plugins; see localCache top-level
       enabled: true
       version: 1
       config:
@@ -479,7 +479,7 @@ helm install bifrost bifrost/bifrost -f enterprise.yaml
 Next steps: jump to [Next Steps](#next-steps).
 
 <Note>
-For DB-backed deployments, built-in plugins support a top-level `version` field (for example: `telemetry`, `logging`, `governance`, `semanticCache`, `otel`, `maxim`, `datadog`). Increase this number when you want config from Helm to overwrite an older plugin record in the DB.
+For DB-backed deployments, built-in plugins support a top-level `version` field (for example: `telemetry`, `logging`, `governance`, `localCache`, `otel`, `maxim`, `datadog`). Increase this number when you want config from Helm to overwrite an older plugin record in the DB.
 </Note>
 
 ## Enterprise Support
@@ -609,7 +609,7 @@ curl http://localhost:8080/health
     SQLite, PostgreSQL, object storage for logs, vector stores
   </Card>
   <Card title="Plugins" icon="puzzle-piece" href="/deployment-guides/helm/plugins">
-    Telemetry, logging, semantic cache, OTel, Datadog, governance
+    Telemetry, logging, OTel, Datadog, governance
   </Card>
   <Card title="Governance" icon="shield" href="/deployment-guides/helm/governance">
     Budgets, rate limits, virtual keys, routing rules
diff --git a/docs/deployment-guides/helm/plugins.mdx b/docs/deployment-guides/helm/plugins.mdx
index 887cf85600..5904bf9783 100644
--- a/docs/deployment-guides/helm/plugins.mdx
+++ b/docs/deployment-guides/helm/plugins.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Plugins"
-description: "Configure Bifrost plugins in Helm - telemetry, logging, semantic cache, OpenTelemetry, Datadog, governance, and custom plugins"
+description: "Configure Bifrost plugins in Helm - telemetry, logging, OpenTelemetry, Datadog, governance, and custom plugins"
 icon: "puzzle-piece"
 ---
 
@@ -9,13 +9,13 @@ Plugins are configured under `bifrost.plugins`. Each plugin is independently ena
 <Note>
 **Telemetry, logging, and governance are auto-loaded built-ins** - they are always active and do not need to be explicitly enabled. Their configuration lives in `bifrost.client.*` and `bifrost.governance.*`, not in the `plugins` block.
 
-The `plugins` block controls the opt-in plugins: `semanticCache`, `otel`, `datadog`, `maxim`, and custom plugins.
+The `plugins` block controls the opt-in plugins: `localCache`, `otel`, `datadog`, `maxim`, and custom plugins.
 </Note>
 
 ```yaml
 bifrost:
   plugins:
-    semanticCache:
+    # local_cache moved out of plugins; see localCache top-level
       enabled: false
     otel:
       enabled: false
@@ -32,7 +32,7 @@ helm install bifrost bifrost/bifrost \
 # Or upgrade to enable a plugin without touching other values
 helm upgrade bifrost bifrost/bifrost \
   --reuse-values \
-  --set bifrost.plugins.semanticCache.enabled=true
+  --set bifrost.client.enableLocalCache=true
 ```
 
 ---
@@ -155,9 +155,9 @@ Define virtual keys, budgets, rate limits, and routing rules in `bifrost.governa
 
 </Tab>
 
-<Tab title="Semantic Cache">
+<Tab title="Local Cache">
 
-### Semantic Cache
+### Local Cache
 
 Caches LLM responses using vector similarity so semantically equivalent prompts return cached answers.
 
@@ -167,28 +167,28 @@ Two modes:
 
 | Parameter | Description | Default |
 |-----------|-------------|---------|
-| `bifrost.plugins.semanticCache.enabled` | Enable semantic caching | `false` |
-| `bifrost.plugins.semanticCache.version` | Plugin config version for DB-backed update tracking (`1` to `32767`) | `1` |
-| `bifrost.plugins.semanticCache.config.provider` | Embedding provider | `"openai"` |
-| `bifrost.plugins.semanticCache.config.embedding_model` | Embedding model name | `"text-embedding-3-small"` |
-| `bifrost.plugins.semanticCache.config.dimension` | Embedding dimension (`1` = direct/hash mode) | `1536` |
-| `bifrost.plugins.semanticCache.config.threshold` | Cosine similarity threshold (0–1) | `0.8` |
-| `bifrost.plugins.semanticCache.config.ttl` | Cache entry TTL (Go duration) | `"5m"` |
-| `bifrost.plugins.semanticCache.config.conversation_history_threshold` | Number of past messages to include in cache key | `3` |
-| `bifrost.plugins.semanticCache.config.cache_by_model` | Include model name in cache key | `true` |
-| `bifrost.plugins.semanticCache.config.cache_by_provider` | Include provider name in cache key | `true` |
-| `bifrost.plugins.semanticCache.config.exclude_system_prompt` | Exclude system prompt from cache key | `false` |
-| `bifrost.plugins.semanticCache.config.cleanup_on_shutdown` | Delete cache data on pod shutdown | `false` |
+| `bifrost.client.enableLocalCache` | Enable the local cache plugin | `false` |
+| `bifrost.localCache.version` | Plugin config version for DB-backed update tracking (`1` to `32767`) | `1` |
+| `bifrost.localCache.config.provider` | Embedding provider | `"openai"` |
+| `bifrost.localCache.config.embedding_model` | Embedding model name | `"text-embedding-3-small"` |
+| `bifrost.localCache.config.dimension` | Embedding dimension (`1` = direct/hash mode) | `1536` |
+| `bifrost.localCache.config.threshold` | Cosine similarity threshold (0–1) | `0.8` |
+| `bifrost.localCache.config.ttl` | Cache entry TTL (Go duration) | `"5m"` |
+| `bifrost.localCache.config.conversation_history_threshold` | Number of past messages to include in cache key | `3` |
+| `bifrost.localCache.config.cache_by_model` | Include model name in cache key | `true` |
+| `bifrost.localCache.config.cache_by_provider` | Include provider name in cache key | `true` |
+| `bifrost.localCache.config.exclude_system_prompt` | Exclude system prompt from cache key | `false` |
+| `bifrost.localCache.config.cleanup_on_shutdown` | Delete cache data on pod shutdown | `false` |
 
 **Semantic mode (with OpenAI embeddings + Weaviate):**
 
 ```bash
-kubectl create secret generic semantic-cache-secret \
+kubectl create secret generic local-cache-secret \
   --from-literal=openai-key='sk-your-openai-embedding-key'
 ```
 
 ```yaml
-# semantic-cache-values.yaml
+# local-cache-values.yaml
 image:
   tag: "v1.4.11"
 
@@ -202,12 +202,12 @@ vectorStore:
 
 bifrost:
   plugins:
-    semanticCache:
+    # local_cache moved out of plugins; see localCache top-level
       enabled: true
       config:
         provider: "openai"
         keys:
-          - value: "env.SEMANTIC_CACHE_OPENAI_KEY"
+          - value: "env.LOCAL_CACHE_OPENAI_KEY"
             weight: 1
         embedding_model: "text-embedding-3-small"
         dimension: 1536
@@ -218,14 +218,14 @@ bifrost:
         cache_by_provider: true
 
   providerSecrets:
-    semantic-cache-key:
-      existingSecret: "semantic-cache-secret"
+    local-cache-key:
+      existingSecret: "local-cache-secret"
       key: "openai-key"
-      envVar: "SEMANTIC_CACHE_OPENAI_KEY"
+      envVar: "LOCAL_CACHE_OPENAI_KEY"
 ```
 
 ```bash
-helm install bifrost bifrost/bifrost -f semantic-cache-values.yaml
+helm install bifrost bifrost/bifrost -f local-cache-values.yaml
 ```
 
 **Direct / hash mode** (no embedding provider needed):
@@ -233,7 +233,7 @@ helm install bifrost bifrost/bifrost -f semantic-cache-values.yaml
 ```yaml
 bifrost:
   plugins:
-    semanticCache:
+    # local_cache moved out of plugins; see localCache top-level
       enabled: true
       config:
         dimension: 1          # triggers hash-based exact matching
@@ -523,7 +523,7 @@ bifrost:
       config:
         is_vk_mandatory: true
 
-    semanticCache:
+    # local_cache moved out of plugins; see localCache top-level
       enabled: true
       config:
         provider: "openai"
diff --git a/docs/deployment-guides/helm/values.mdx b/docs/deployment-guides/helm/values.mdx
index 6badddb2ed..7816d1d026 100644
--- a/docs/deployment-guides/helm/values.mdx
+++ b/docs/deployment-guides/helm/values.mdx
@@ -611,7 +611,7 @@ bifrost:
       envVar: "OPENAI_API_KEY"
 
   plugins:
-    semanticCache:
+    # local_cache moved out of plugins; see localCache top-level
       enabled: true
       config:
         provider: "openai"
diff --git a/docs/docs.json b/docs/docs.json
index d406d683f0..23975b8e76 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -205,7 +205,7 @@
                 ]
               },
               "features/telemetry",
-              "features/semantic-caching",
+              "features/local-caching",
               {
                 "group": "Prompt Repository",
                 "icon": "folder",
@@ -704,6 +704,10 @@
     ]
   },
   "redirects": [
+    {
+      "source": "/features/semantic-caching",
+      "destination": "/features/local-caching"
+    },
     {
       "source": "/features/fallbacks",
       "destination": "/features/retries-and-fallbacks"
diff --git a/docs/features/drop-in-replacement.mdx b/docs/features/drop-in-replacement.mdx
index b6be05818d..8edf4d8008 100644
--- a/docs/features/drop-in-replacement.mdx
+++ b/docs/features/drop-in-replacement.mdx
@@ -58,7 +58,7 @@ Once your SDK points to Bifrost, you automatically get:
 
 - **Multi-provider support** with automatic failovers
 - **Load balancing** across multiple API keys
-- **Semantic caching** for faster responses
+- **Local caching** for faster responses
 - **Governance controls** for usage monitoring and budgets
 - **Request/response logging** and analytics
 - **Rate limiting** and circuit breakers
diff --git a/docs/features/semantic-caching.mdx b/docs/features/local-caching.mdx
similarity index 57%
rename from docs/features/semantic-caching.mdx
rename to docs/features/local-caching.mdx
index 5413649ea2..f268ad00e3 100644
--- a/docs/features/semantic-caching.mdx
+++ b/docs/features/local-caching.mdx
@@ -1,33 +1,33 @@
 ---
-title: "Semantic Caching"
-description: "Intelligent response caching based on semantic similarity. Reduce costs and latency by serving cached responses for semantically similar requests."
+title: "Local Caching"
+description: "Cache responses locally with two complementary lookup paths: direct hash matching for exact replays and semantic similarity search for related content. Cuts cost and latency on repeated or near-duplicate requests."
 icon: "database"
 ---
 
 ## Overview
 
-Semantic caching uses vector similarity search to intelligently cache AI responses, serving cached results for semantically similar requests even when the exact wording differs. This dramatically reduces API costs and latency for repeated or similar queries.
+Local caching serves repeated requests from a local store instead of re-hitting the upstream provider. It runs two complementary lookup paths: a **direct** hash match (xxhash on the request payload) for exact replays, and a **semantic** similarity search (vector embeddings + cosine threshold) for related content. Direct hits are tried first — they're the cheapest path and don't need an embedding provider — so direct-only mode is a fully usable configuration on its own.
 
 **Key Benefits:**
-- **Cost Reduction**: Avoid expensive LLM API calls for similar requests
-- **Improved Performance**: Sub-millisecond cache retrieval vs multi-second API calls  
-- **Intelligent Matching**: Semantic similarity beyond exact text matching
+- **Cost Reduction**: Avoid LLM API calls for repeated and near-duplicate requests
+- **Improved Performance**: Sub-millisecond cache retrieval vs multi-second API calls
+- **Two Modes**: Pure direct (hash-only, no embedding provider) or direct + semantic (adds vector similarity)
 - **Streaming Support**: Full streaming response caching with proper chunk ordering
 
 ---
 
 ## Core Features
 
-- **Dual-Layer Caching**: Exact hash matching + semantic similarity search (customizable threshold)
-- **Vector-Powered Intelligence**: Uses embeddings to find semantically similar requests
-- **Dynamic Configuration**: Per-request TTL and threshold overrides via headers/context
-- **Model/Provider Isolation**: Separate caching per model and provider combination
+- **Two Lookup Paths**: Exact hash matching first, then semantic similarity search (when configured)
+- **Live Configuration**: All config fields mutate in-place via `PUT /api/local-cache/config` — no plugin reload, no request drop
+- **Dynamic Per-Request Overrides**: TTL, threshold, mode, and no-store via headers/context
+- **Model/Provider Isolation**: Optional cache-key composition by model and/or provider
 
 ---
 
 ## Vector Store Setup
 
-Semantic caching requires a configured vector store. Bifrost supports the following vector databases:
+Local caching requires a configured vector store (used by the direct path too — for indexing entries by their deterministic UUIDv5 storage ID, not for similarity). Bifrost supports the following vector databases:
 
 <CardGroup cols={2}>
   <Card title="Weaviate" icon="database" href="/integrations/vector-databases/weaviate">
@@ -100,88 +100,52 @@ if err != nil {
 
 ---
 
-## Semantic Cache Configuration
+## Local Cache Configuration
 
-> **UI Note**: The current Web UI flow configures provider-backed semantic caching. If you want direct-only mode (`dimension: 1` with no `provider`), configure it through `config.json`.
+The local cache uses a dedicated configuration surface — a top-level `local_cache` block in `config.json` and the matching `GET`/`PUT /api/local-cache/config` REST endpoints. The plugin enable/disable toggle lives on `client.enable_local_cache`. Flipping that flag loads or unloads the plugin without a server restart; field changes mutate the shared `*LocalCacheConfig` pointer in place so the running plugin sees them on the next request.
 
 <Tabs group="cache-config">
 
-<Tab title="Go SDK">
-
-```go
-import (
-    "github.com/maximhq/bifrost/plugins/semanticcache"
-    "github.com/maximhq/bifrost/core/schemas"
-)
-
-// Configure semantic cache plugin
-cacheConfig := &semanticcache.Config{
-    // Embedding model configuration (Required)
-    Provider:       schemas.OpenAI,
-    EmbeddingModel: "text-embedding-3-small",
-    Dimension:     1536,
-    
-    // Cache behavior
-    TTL:       5 * time.Minute,  // Time to live for cached responses (default: 5 minutes)
-    Threshold: 0.8,              // Similarity threshold for cache lookup (default: 0.8)
-    CleanUpOnShutdown: true,     // Clean up cache on shutdown (default: false)
-    
-    // Conversation behavior
-    ConversationHistoryThreshold: 5,    // Skip caching if conversation has > N messages (default: 3)
-    ExcludeSystemPrompt: bifrost.Ptr(false), // Exclude system messages from cache key (default: false)
-    
-    // Advanced options
-    CacheByModel:    bifrost.Ptr(true),  // Include model in cache key (default: true)
-    CacheByProvider: bifrost.Ptr(true),  // Include provider in cache key (default: true)
-}
-
-// Create plugin
-plugin, err := semanticcache.Init(context.Background(), cacheConfig, logger, store)
-if err != nil {
-    log.Fatal("Failed to create semantic cache plugin:", err)
-}
-
-// Add to Bifrost config
-bifrostConfig := schemas.BifrostConfig{
-    LLMPlugins: []schemas.LLMPlugin{plugin},
-    // ... other config
-}
-```
-
-</Tab>
-
 <Tab title="Web UI">
 
-![Semantic Cache Plugin Configuration](../media/ui-semantic-cache-config.png)
+![Local Cache Plugin Configuration](../media/ui-local-cache-config.png)
 
-**Prerequisites**: A vector store must be configured and enabled in `config.json`, and at least one provider must be configured, before the toggle becomes available.
+**Prerequisites**: A vector store must be configured and enabled in `config.json` before the toggle becomes available. An embedding-capable provider is required only for **Direct + Semantic** mode — direct-only works without one.
 
-1. **Navigate to the Config page** in the Bifrost UI and find the **Plugins** section.
+1. **Navigate to the Config page** in the Bifrost UI and find the **Caching** section.
 
-2. **Toggle** the **Enable Semantic Caching** switch to enable it. The configuration form expands below.
+2. **Toggle Enable Local Cache** at the top to load the plugin. This flips `client.enable_local_cache` and triggers a transparent plugin reload — no server restart required.
 
-3. **Fill in the fields** across the four sections:
+3. **Pick a Cache Mode**:
+   - **Direct only** — pure hash matching. No embedding provider needed. Best for stable prompts and the cheapest path.
+   - **Direct + Semantic** — adds vector similarity search on top. Requires an embedding-capable provider; direct hits are still served first, semantic search only runs on direct misses.
 
-**Provider and Model Settings** (required for semantic mode):
+4. **Fill in the fields** that the chosen mode shows:
+
+**Embedding Provider & Model** (Direct + Semantic only):
 - **Configured Providers**: Dropdown of providers already set up in Bifrost. The selected provider's API keys are inherited automatically.
 - **Embedding Model**: The embedding model to use (e.g. `text-embedding-3-small`).
+- **Dimension**: Vector size produced by the model — must match the model exactly. Common values: `1536` for OpenAI `text-embedding-3-small`, `3072` for `text-embedding-3-large`, `768` for many Cohere/Voyage models.
 
 **Cache Settings**:
 - **TTL (seconds)**: How long cached responses are kept (default: 300 s).
-- **Similarity Threshold**: Cosine similarity cutoff for a cache hit (0–1, default: 0.8).
-- **Dimension**: Vector size produced by the embedding model — must match the model exactly. Common values: `1536` for OpenAI `text-embedding-3-small`, `3072` for `text-embedding-3-large`, `768` for many Cohere/Voyage models. Use `1` only in direct-only mode (no provider).
+- **Similarity Threshold** (semantic mode only): Cosine similarity cutoff for a cache hit (0–1, default: 0.8).
 
-> **Heads up**: a vector store namespace can only hold vectors of *one* dimension. Whenever you change the embedding **provider**, **model**, or **dimension**, make sure the new dimension still matches what the model produces — otherwise writes to the existing namespace will fail and reads will silently miss. The namespace is **not** recreated automatically; either point `vector_store_namespace` at a fresh name or drop the existing class/index in your vector store before saving.
+**Storage & Cache Key**:
+- **Vector Store Namespace**: Bucket/index name within the vector store. Default `BifrostLocalCachePlugin`.
+- **Default Cache Key**: Fallback partition key used when a request doesn't set `x-bf-cache-key`. Leave blank to disable caching for header-less requests.
 
 **Conversation Settings**:
 - **Conversation History Threshold**: Skip caching when the conversation has more than this many messages (default: 3).
 - **Exclude System Prompt** (toggle): Exclude system messages from cache-key generation.
 
-**Cache Behavior**:
-- **Cache by Model** (toggle): Include the model name in the cache key (default: on).
-- **Cache by Provider** (toggle): Include the provider name in the cache key (default: on).
+**Cache Key Composition**:
+- **Cache by Model**: Include the model name in the cache key (default: on).
+- **Cache by Provider**: Include the provider name in the cache key (default: on).
+
+5. Click **Save**. The change persists to `config_local_cache` and the running plugin's pointer is mutated in place.
 
-4. Click **Save**. Changes are persisted and applied immediately for enabled plugins via the API reload path; other plugin changes (e.g. via `config.json`) may still require a restart.
+> **Heads up**: when you change the **provider**, **model**, or **dimension**, make sure the new dimension matches what the model produces — otherwise writes to the existing namespace will fail and reads will silently miss. Old data is left in the previous namespace by design (no automatic flush on dimension change).
 
 </Tab>
 
@@ -189,31 +153,29 @@ bifrostConfig := schemas.BifrostConfig{
 
 ```json
 {
-  "plugins": [
-    {
-      "enabled": true,
-      "name": "semantic_cache",
-      "config": {        
-        "provider": "openai",
-        "embedding_model": "text-embedding-3-small",
-        "dimension": 1536,
-        
-        "cleanup_on_shutdown": true,
-        "ttl": "5m",
-        "threshold": 0.8,
-        
-        "conversation_history_threshold": 3,
-        "exclude_system_prompt": false,
-        
-        "cache_by_model": true,
-        "cache_by_provider": true
-      }
-    }
-  ]
+  "client": {
+    "enable_local_cache": true
+  },
+  "vector_store": { "...": "..." },
+  "local_cache": {
+    "provider": "openai",
+    "embedding_model": "text-embedding-3-small",
+    "dimension": 1536,
+
+    "cleanup_on_shutdown": true,
+    "ttl": "5m",
+    "threshold": 0.8,
+
+    "conversation_history_threshold": 3,
+    "exclude_system_prompt": false,
+
+    "cache_by_model": true,
+    "cache_by_provider": true
+  }
 }
 ```
 
-> **Note**: Provider API keys are inherited automatically from the global provider configuration. You do not need to (and cannot) specify keys inside the plugin config.
+> **Note**: Provider API keys are inherited automatically from the global provider configuration. You do not need to (and cannot) specify keys inside the local-cache config.
 
 **TTL Format Options:**
 - Duration strings: `"30s"`, `"5m"`, `"1h"`, `"24h"`
@@ -221,52 +183,115 @@ bifrostConfig := schemas.BifrostConfig{
 
 </Tab>
 
+<Tab title="REST API">
+
+```bash
+# Read the live config
+curl localhost:8080/api/local-cache/config
+
+# Update the live config — mutates the running plugin in place, no reload
+curl -X PUT localhost:8080/api/local-cache/config \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "provider": "openai",
+    "embedding_model": "text-embedding-3-small",
+    "dimension": 1536,
+    "ttl": "5m",
+    "threshold": 0.8
+  }'
+
+# Toggle the plugin on/off — flips client.enable_local_cache, triggers
+# transparent ReloadPlugin / RemovePlugin
+curl -X PUT localhost:8080/api/config \
+  -H 'Content-Type: application/json' \
+  -d '{ "client_config": { "enable_local_cache": true } }'
+```
+
+</Tab>
+
+<Tab title="Go SDK">
+
+```go
+import (
+    "github.com/maximhq/bifrost/framework/configstore"
+    "github.com/maximhq/bifrost/plugins/localcache"
+    "github.com/maximhq/bifrost/core/schemas"
+)
+
+// The plugin and the framework share a *LocalCacheConfig pointer so PUT
+// /api/local-cache/config can mutate live behavior at runtime.
+cacheConfig := &configstore.LocalCacheConfig{
+    // Embedding model configuration (Direct + Semantic mode)
+    Provider:       schemas.OpenAI,
+    EmbeddingModel: "text-embedding-3-small",
+    Dimension:      1536,
+
+    // Cache behavior
+    TTL:               5 * time.Minute, // Time to live for cached responses (default: 5 minutes)
+    Threshold:         0.8,             // Cosine similarity cutoff (default: 0.8)
+    CleanUpOnShutdown: true,            // Drop entries on shutdown (default: false)
+
+    // Conversation behavior
+    ConversationHistoryThreshold: 5,                 // Skip caching if conversation has > N messages (default: 3)
+    ExcludeSystemPrompt:          bifrost.Ptr(false), // Exclude system messages from cache key (default: false)
+
+    // Advanced options
+    CacheByModel:    bifrost.Ptr(true), // Include model in cache key (default: true)
+    CacheByProvider: bifrost.Ptr(true), // Include provider in cache key (default: true)
+}
+
+plugin, err := localcache.Init(context.Background(), cacheConfig, logger, store)
+if err != nil {
+    log.Fatal("Failed to create local cache plugin:", err)
+}
+```
+
+</Tab>
+
 </Tabs>
 
 ---
 
-## Direct Hash Mode (Embedding-Free)
+## Direct-Only Mode (Embedding-Free)
 
-Direct hash mode provides exact-match caching without requiring an embedding provider. Each request is hashed deterministically based on its normalized input, parameters, and stream flag. Identical requests produce cache hits; different wording is a cache miss.
+Direct-only mode provides exact-match caching without an embedding provider. Each request is hashed deterministically based on its normalized input, parameters, and stream flag. Identical requests are cache hits; different wording is a cache miss.
 
-Exact-match direct entries are stored and retrieved using a deterministic cache ID. This keeps repeated direct cache lookups fast and consistent across retries, streaming responses, and restarts.
+Direct entries are stored and retrieved using a deterministic UUIDv5 cache ID, so repeated lookups are fast and consistent across retries, streaming responses, and restarts.
 
-**When to use direct hash mode:**
-- You only need exact-match deduplication (no fuzzy/semantic matching)
+**When to use direct-only mode:**
+- You only need exact-match deduplication (no fuzzy matching)
 - You cannot or do not want to call an external embedding API
 - You want the lowest possible latency with zero embedding overhead
 - Cost-sensitive environments where embedding API calls add up
 
 ### Setup
 
-To enable direct-only mode globally, set `dimension: 1` and omit the `provider` and `embedding_model` fields from the plugin config. The plugin will automatically fall back to direct search only.
+To run direct-only, set `dimension: 1` and omit `provider` and `embedding_model`. The plugin disables the semantic search path entirely and goes through the deterministic direct hash path.
 
-> **Important**: If you specify `dimension: 1` and also provide a `provider`, Bifrost treats the config as provider-backed semantic mode, not direct-only mode. To use direct-only mode, omit the `provider` field entirely.
+> **Important**: If you specify `dimension: 1` and also provide a `provider`, Bifrost treats the config as provider-backed semantic mode, not direct-only. To use direct-only, omit the `provider` field entirely.
 
 <Warning>
-A vector store is still required as the storage backend, even in direct hash mode. See [Recommended Vector Store](#recommended-vector-store) below for the best choice.
+A vector store is still required as the storage backend, even in direct-only mode (entries are indexed by their deterministic ID). See [Recommended Vector Store](#recommended-vector-store) below.
 </Warning>
 
 <Tabs group="direct-hash-setup">
 
-<Tab title="Go SDK">
-
-```go
-import (
-    "github.com/maximhq/bifrost/plugins/semanticcache"
-)
-
-cacheConfig := &semanticcache.Config{
-    // No Provider or EmbeddingModel -- direct hash mode only
-    Dimension: 1, // Placeholder; entries are stored as metadata-only (no embedding vectors). Change dimension before switching to dual-layer mode to avoid mixed-dimension issues.
+<Tab title="config.json">
 
-    TTL:               5 * time.Minute,
-    CleanUpOnShutdown: true,
-    CacheByModel:      bifrost.Ptr(true),
-    CacheByProvider:   bifrost.Ptr(true),
+```json
+{
+  "client": {
+    "enable_local_cache": true
+  },
+  "vector_store": { "...": "..." },
+  "local_cache": {
+    "dimension": 1,
+    "ttl": "5m",
+    "cleanup_on_shutdown": true,
+    "cache_by_model": true,
+    "cache_by_provider": true
+  }
 }
-
-plugin, err := semanticcache.Init(ctx, cacheConfig, logger, store)
 ```
 
 </Tab>
@@ -275,37 +300,37 @@ plugin, err := semanticcache.Init(ctx, cacheConfig, logger, store)
 
 ```yaml
 bifrost:
-  plugins:
-    semanticCache:
-      enabled: true
-      config:
-        dimension: 1
-        ttl: "5m"
-        cleanup_on_shutdown: true
-        cache_by_model: true
-        cache_by_provider: true
+  client:
+    enableLocalCache: true
+  localCache:
+    dimension: 1
+    ttl: "5m"
+    cleanup_on_shutdown: true
+    cache_by_model: true
+    cache_by_provider: true
 ```
 
 </Tab>
 
-<Tab title="config.json">
+<Tab title="Go SDK">
 
-```json
-{
-  "plugins": [
-    {
-      "enabled": true,
-      "name": "semantic_cache",
-      "config": {
-        "dimension": 1,
-        "ttl": "5m",
-        "cleanup_on_shutdown": true,
-        "cache_by_model": true,
-        "cache_by_provider": true
-      }
-    }
-  ]
+```go
+import (
+    "github.com/maximhq/bifrost/framework/configstore"
+    "github.com/maximhq/bifrost/plugins/localcache"
+)
+
+cacheConfig := &configstore.LocalCacheConfig{
+    // No Provider or EmbeddingModel — direct-only mode
+    Dimension: 1, // Placeholder so stores that require vectors stay happy. Change before switching to direct + semantic mode to avoid mixed-dimension issues.
+
+    TTL:               5 * time.Minute,
+    CleanUpOnShutdown: true,
+    CacheByModel:      bifrost.Ptr(true),
+    CacheByProvider:   bifrost.Ptr(true),
 }
+
+plugin, err := localcache.Init(ctx, cacheConfig, logger, store)
 ```
 
 </Tab>
@@ -373,7 +398,7 @@ When the plugin is initialized **with** an embedding provider (dual-layer mode),
 ## Cache Triggering
 
 <Warning>
-**Cache Key is mandatory**: Semantic caching only activates when a cache key is provided. Without a cache key, requests bypass caching entirely.
+**Cache Key is mandatory**: The local cache only activates when a cache key is provided. Without a cache key, requests bypass caching entirely.
 </Warning>
 
 <Tabs group="cache-triggering">
@@ -383,7 +408,7 @@ Must set cache key in request context:
 
 ```go
 // This request WILL be cached
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "session-123")
+ctx = context.WithValue(ctx, localcache.CacheKey, "session-123")
 response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), request)
 
 // This request will NOT be cached (no context value)
@@ -415,13 +440,13 @@ Override default TTL and similarity threshold per request:
 
 <Tab title="Go SDK">
 
-You can set TTL and threshold in the request context using the semantic cache context keys:
+You can set TTL and threshold in the request context using the local cache context keys:
 
 ```go
 // Go SDK: Custom TTL and threshold
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "session-123")
-ctx = context.WithValue(ctx, semanticcache.CacheTTLKey, 30*time.Second)
-ctx = context.WithValue(ctx, semanticcache.CacheThresholdKey, 0.9)
+ctx = context.WithValue(ctx, localcache.CacheKey, "session-123")
+ctx = context.WithValue(ctx, localcache.CacheTTLKey, 30*time.Second)
+ctx = context.WithValue(ctx, localcache.CacheThresholdKey, 0.9)
 ```
 
 </Tab>
@@ -455,15 +480,15 @@ Control which caching mechanism to use per request:
 
 ```go
 // Use only direct hash matching (fastest)
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "session-123")
-ctx = context.WithValue(ctx, semanticcache.CacheTypeKey, semanticcache.CacheTypeDirect)
+ctx = context.WithValue(ctx, localcache.CacheKey, "session-123")
+ctx = context.WithValue(ctx, localcache.CacheTypeKey, localcache.CacheTypeDirect)
 
 // Use only semantic similarity search
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "session-123")  
-ctx = context.WithValue(ctx, semanticcache.CacheTypeKey, semanticcache.CacheTypeSemantic)
+ctx = context.WithValue(ctx, localcache.CacheKey, "session-123")  
+ctx = context.WithValue(ctx, localcache.CacheTypeKey, localcache.CacheTypeSemantic)
 
 // Default behavior: Direct + semantic fallback (if not specified)
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "session-123")
+ctx = context.WithValue(ctx, localcache.CacheKey, "session-123")
 ```
 
 </Tab>
@@ -497,8 +522,8 @@ Disable response caching while still allowing cache reads:
 
 ```go
 // Read from cache but don't store the response
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "session-123")
-ctx = context.WithValue(ctx, semanticcache.CacheNoStoreKey, true)
+ctx = context.WithValue(ctx, localcache.CacheKey, "session-123")
+ctx = context.WithValue(ctx, localcache.CacheNoStoreKey, true)
 ```
 
 </Tab>
@@ -565,7 +590,7 @@ Control whether system messages are included in cache key generation:
 
 ### Cache Metadata Location
 
-When responses are served from semantic cache, 3 key variables are automatically added to the response:
+When responses are served from the local cache, 3 key variables are automatically added to the response:
 
 **Location**: `response.ExtraFields.CacheDebug` (as a JSON object)
 
@@ -575,7 +600,7 @@ When responses are served from semantic cache, 3 key variables are automatically
 - `CacheID` (string): Unique cache entry ID for management operations (present only for cache hits)
 
 
-**Semantic Cache Only**:
+**Semantic mode only**:
 - `ProviderUsed` (string): Provider used for the calculating semantic match embedding. (present for both cache hits and misses)
 - `ModelUsed` (string): Model used for the calculating semantic match embedding. (present for both cache hits and misses)
 - `InputTokens` (number): Number of tokens extracted from the request for the semantic match embedding calculation. (present for both cache hits and misses)
@@ -659,7 +684,7 @@ curl -X DELETE http://localhost:8080/api/cache/clear-by-key/support-session-456
 
 ### Cache Lifecycle & Cleanup
 
-The semantic cache automatically handles cleanup to prevent storage bloat:
+The local cache automatically handles cleanup to prevent storage bloat:
 
 **Automatic Cleanup:**
 - **TTL Expiration**: Entries are automatically removed when TTL expires
@@ -672,7 +697,7 @@ The semantic cache automatically handles cleanup to prevent storage bloat:
 - Restart Bifrost to clear all cache data
 
 <Warning>
-The semantic cache namespace and all its cache entries are deleted when Bifrost client shuts down **only if `cleanup_on_shutdown` is set to `true`**. By default (`cleanup_on_shutdown: false`), cache data persists between restarts. DO NOT use the plugin's namespace for external purposes.
+The local cache namespace and all its cache entries are deleted when Bifrost client shuts down **only if `cleanup_on_shutdown` is set to `true`**. By default (`cleanup_on_shutdown: false`), cache data persists between restarts. DO NOT use the plugin's namespace for external purposes.
 </Warning>
 
 <Warning>
@@ -686,5 +711,5 @@ The semantic cache namespace and all its cache entries are deleted when Bifrost
 ---
 
 <Info>
-**Vector Store Requirement**: Semantic caching requires a configured vector store. Bifrost supports Weaviate, Redis/Valkey-compatible endpoints, Qdrant, and Pinecone. See the [Vector Store documentation](/architecture/framework/vector-store) for setup details.
+**Vector Store Requirement**: Local caching requires a configured vector store. Bifrost supports Weaviate, Redis/Valkey-compatible endpoints, Qdrant, and Pinecone. See the [Vector Store documentation](/architecture/framework/vector-store) for setup details.
 </Info>
diff --git a/docs/integrations/bedrock-sdk/overview.mdx b/docs/integrations/bedrock-sdk/overview.mdx
index 9d5c71aa15..b6789a367f 100644
--- a/docs/integrations/bedrock-sdk/overview.mdx
+++ b/docs/integrations/bedrock-sdk/overview.mdx
@@ -256,7 +256,7 @@ The Bedrock integration currently supports:
 - **Streaming** via `converse_stream` and `invoke_model_with_response_stream`
 - **Tools** via `toolConfig`, `toolUse`, and `toolResult` inside Converse requests  
 - **Image and multimodal** responses where supported by the underlying Bedrock model  
-- All Bifrost core features that apply to these flows (governance, load balancing, semantic cache, observability, etc.)
+- All Bifrost core features that apply to these flows (governance, load balancing, local cache, observability, etc.)
 
 ---
 
diff --git a/docs/integrations/langchain-sdk.mdx b/docs/integrations/langchain-sdk.mdx
index 40b5c24ebd..51b261d422 100644
--- a/docs/integrations/langchain-sdk.mdx
+++ b/docs/integrations/langchain-sdk.mdx
@@ -720,5 +720,5 @@ The Langchain integration supports all features that are available in both the L
 ## Next Steps
 
 - **[Governance Features](../features/governance)** - Virtual keys and team management
-- **[Semantic Caching](../features/semantic-caching)** - Intelligent response caching
+- **[Semantic Caching](../features/local-caching)** - Intelligent response caching
 - **[Configuration](../quickstart/README)** - Provider setup and API key management
diff --git a/docs/integrations/litellm-sdk.mdx b/docs/integrations/litellm-sdk.mdx
index 10f1fd6077..7278a0d58c 100644
--- a/docs/integrations/litellm-sdk.mdx
+++ b/docs/integrations/litellm-sdk.mdx
@@ -176,5 +176,5 @@ The LiteLLM integration supports all features that are available in both the Lit
 ## Next Steps
 
 - **[Governance Features](../features/governance)** - Virtual keys and team management
-- **[Semantic Caching](../features/semantic-caching)** - Intelligent response caching
+- **[Semantic Caching](../features/local-caching)** - Intelligent response caching
 - **[Configuration](../quickstart/README)** - Provider setup and API key management
diff --git a/docs/integrations/pydanticai-sdk.mdx b/docs/integrations/pydanticai-sdk.mdx
index b754cd4772..bacfdd2765 100644
--- a/docs/integrations/pydanticai-sdk.mdx
+++ b/docs/integrations/pydanticai-sdk.mdx
@@ -404,6 +404,6 @@ Your existing Pydantic AI agents work seamlessly with Bifrost's enterprise featu
 ## Next Steps
 
 - **[Governance Features](../features/governance)** - Virtual keys and team management
-- **[Semantic Caching](../features/semantic-caching)** - Intelligent response caching
+- **[Semantic Caching](../features/local-caching)** - Intelligent response caching
 - **[Configuration](../quickstart/README)** - Provider setup and API key management
 
diff --git a/docs/integrations/vector-databases/pinecone.mdx b/docs/integrations/vector-databases/pinecone.mdx
index bdcbfa9896..a84b536fb5 100644
--- a/docs/integrations/vector-databases/pinecone.mdx
+++ b/docs/integrations/vector-databases/pinecone.mdx
@@ -94,4 +94,4 @@ For local development with Pinecone Local, any API key value works (e.g., "pcloc
 Pinecone requires all IDs to be unique strings. Namespaces are created automatically when you first upsert vectors.
 </Warning>
 
-For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/semantic-caching).
+For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/local-caching).
diff --git a/docs/integrations/vector-databases/qdrant.mdx b/docs/integrations/vector-databases/qdrant.mdx
index 2b8343d950..27f9957d90 100644
--- a/docs/integrations/vector-databases/qdrant.mdx
+++ b/docs/integrations/vector-databases/qdrant.mdx
@@ -91,4 +91,4 @@ Qdrant uses port 6334 for gRPC and port 6333 for REST. Bifrost uses the gRPC por
 Qdrant requires all IDs to be valid UUIDs. Use `uuid.New().String()` to generate IDs.
 </Warning>
 
-For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/semantic-caching).
+For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/local-caching).
diff --git a/docs/integrations/vector-databases/redis.mdx b/docs/integrations/vector-databases/redis.mdx
index bd2f167f7c..f93a823e10 100644
--- a/docs/integrations/vector-databases/redis.mdx
+++ b/docs/integrations/vector-databases/redis.mdx
@@ -238,4 +238,4 @@ deleteResults, err := store.DeleteAll(ctx, namespace, queries)
 - Monitor memory usage and set appropriate TTL values
 </Warning>
 
-For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/semantic-caching).
+For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/local-caching).
diff --git a/docs/integrations/vector-databases/weaviate.mdx b/docs/integrations/vector-databases/weaviate.mdx
index 047316bfe7..a7fd8aecd6 100644
--- a/docs/integrations/vector-databases/weaviate.mdx
+++ b/docs/integrations/vector-databases/weaviate.mdx
@@ -143,4 +143,4 @@ vectorConfig := &vectorstore.Config{
 **Authentication**: Always use API keys for Weaviate Cloud deployments and configure proper authentication for self-hosted instances in production.
 </Warning>
 
-For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/semantic-caching).
+For the VectorStore interface API and usage examples, see [Vector Store Architecture](/architecture/framework/vector-store). For semantic caching setup, see [Semantic Caching](/features/local-caching).
diff --git a/docs/migration-guides/v1.5.0.mdx b/docs/migration-guides/v1.5.0.mdx
index c908a5aced..4757bab9b6 100644
--- a/docs/migration-guides/v1.5.0.mdx
+++ b/docs/migration-guides/v1.5.0.mdx
@@ -521,6 +521,99 @@ Single-key, pinned (`x-bf-key-id` / `x-bf-key-name`), and session-sticky request
 
 ---
 
+## Breaking Change 14: Semantic Cache Renamed to Local Cache
+
+The plugin formerly known as `semantic_cache` has been renamed to `local_cache` to reflect the fact that it serves both direct (exact) and semantic (similarity) cache hits. The rename is mechanical — caching behavior, vector store backends, and the request-context API are unchanged.
+
+It also brings a brand-new configuration surface: a dedicated `config_local_cache` table, a top-level `local_cache` key in `config.json`, and a single REST endpoint pair (`GET`/`PUT /api/local-cache/config`). The plugin enable/disable toggle moves to a `client.enable_local_cache` flag — flipping it loads or unloads the plugin without a server restart. Pure config changes (TTL, threshold, etc.) mutate a shared pointer in place, so the running plugin observes them on the next request.
+
+### `config.json`
+
+The plugin now lives at the top level of `config.json`, alongside `vector_store` and `client`. Add `client.enable_local_cache: true` to load it on boot.
+
+**Before:**
+```json
+{
+  "vector_store": { "...": "..." },
+  "plugins": [
+    {
+      "enabled": true,
+      "name": "local_cache",
+      "config": {
+        "dimension": 1,
+        "ttl": 300,
+        "threshold": 0.8
+      }
+    }
+  ]
+}
+```
+
+**After:**
+```json
+{
+  "client": {
+    "enable_local_cache": true
+  },
+  "vector_store": { "...": "..." },
+  "local_cache": {
+    "dimension": 1,
+    "ttl": 300,
+    "threshold": 0.8
+  }
+}
+```
+
+Existing `config_plugins['semantic_cache']` rows are auto-migrated to the new `config_local_cache` table on first boot — no manual SQL is required. The legacy plugin row is deleted after the migration.
+
+### REST API
+
+A dedicated endpoint pair replaces the generic `PUT /api/plugins/semantic_cache`:
+
+| Operation | Endpoint |
+|---|---|
+| Read current local-cache config | `GET /api/local-cache/config` |
+| Update local-cache config (live) | `PUT /api/local-cache/config` |
+| Enable/disable the plugin | `PUT /api/config` with `client.enable_local_cache` |
+
+`PUT /api/local-cache/config` mutates the running plugin's configuration in place — no plugin reload, no request drop. Structural changes (`vector_store_namespace`, `dimension`) automatically call `EnsureNamespace` so the new namespace is materialized before the next request lands.
+
+### Go SDK
+
+The plugin Go module path changed:
+
+```
+github.com/maximhq/bifrost/plugins/localcache → github.com/maximhq/bifrost/plugins/localcache
+```
+
+All public type names that referenced the plugin name (`semanticcache.Plugin`, `semanticcache.Config`, `semanticcache.PluginName`) move to the `localcache` package. The plugin name string is now `"local_cache"`. Cache-mode names — `CacheTypeDirect` and `CacheTypeSemantic` — describe the lookup *technique* and stay unchanged. Per-request context keys also rename:
+
+| Before | After |
+|---|---|
+| `"semantic_cache-key"` | `"local_cache-key"` |
+| `"semantic_cache-ttl"` | `"local_cache-ttl"` |
+| `"semantic_cache-threshold"` | `"local_cache-threshold"` |
+| `"semantic_cache-cache_type"` | `"local_cache-cache_type"` |
+| `"semantic_cache-no_store"` | `"local_cache-no_store"` |
+
+If you set these keys directly on `BifrostContext`, update them. If you use the typed helpers exported from the plugin (`localcache.CacheKey`, etc.) the upgrade is automatic.
+
+The default vector-store namespace name and the cache-entry filter property also change:
+
+- `BifrostSemanticCachePlugin` → `BifrostLocalCachePlugin`
+- `from_bifrost_semantic_cache_plugin` → `from_bifrost_local_cache_plugin`
+
+Entries written under the old defaults remain in the old namespace; the plugin no longer queries them. If you want to evict the stale data, drop the old namespace from your vector store directly.
+
+### Migration Checklist
+
+- Replace any `plugins[]` entry named `"local_cache"` in `config.json` with the top-level `local_cache` block and add `client.enable_local_cache: true`.
+- Replace `import "github.com/maximhq/bifrost/plugins/localcache"` with `"github.com/maximhq/bifrost/plugins/localcache"` in your Go code; update any direct context-key strings.
+- Update any automation that hits `PUT /api/plugins/semantic_cache` to use the new `/api/local-cache/config` endpoint.
+- If you depend on the old default namespace `BifrostSemanticCachePlugin` continuing to be queried, set `vector_store_namespace: "BifrostSemanticCachePlugin"` in the new config block to override the new default.
+
+---
+
 ## Breaking Change 13: Semantic Cache Clear API is Now Cache-ID Based
 
 The semantic cache "clear by request ID" API has been removed. Storage IDs in the cache are deterministic UUIDv5 hashes derived from the request payload (so the same prompt across many requests maps to a single cache entry), which made the previous request-ID-based delete unable to match anything written by the direct-search path.
diff --git a/docs/openapi/openapi.yaml b/docs/openapi/openapi.yaml
index 53a3bf4d8c..8edc0755bb 100644
--- a/docs/openapi/openapi.yaml
+++ b/docs/openapi/openapi.yaml
@@ -790,6 +790,8 @@ paths:
     $ref: './paths/management/cache.yaml#/clear-by-cache-id'
   /api/cache/clear-by-key/{cacheKey}:
     $ref: './paths/management/cache.yaml#/clear-by-cache-key'
+  /api/local-cache/config:
+    $ref: './paths/management/local_cache.yaml#/config'
 
   # Infrastructure
   /ws:
diff --git a/docs/openapi/paths/management/local_cache.yaml b/docs/openapi/paths/management/local_cache.yaml
new file mode 100644
index 0000000000..be1e419bb0
--- /dev/null
+++ b/docs/openapi/paths/management/local_cache.yaml
@@ -0,0 +1,54 @@
+config:
+  get:
+    operationId: getLocalCacheConfig
+    summary: Get local cache configuration
+    description: |
+      Returns the live local-cache configuration. Falls back to the database
+      when the in-memory pointer hasn't been hydrated (typically because the
+      plugin is currently disabled). Returns an empty object when no config
+      has been persisted yet.
+    tags:
+      - Cache
+    responses:
+      '200':
+        description: Current local-cache configuration
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/cache.yaml#/LocalCacheConfig'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+      '503':
+        description: Config store not available
+  put:
+    operationId: updateLocalCacheConfig
+    summary: Update local cache configuration
+    description: |
+      Validates and persists a new local-cache configuration. After the DB
+      write, the in-memory shared pointer is mutated in place — the running
+      plugin observes the new values on its next request without a reload.
+      Structural changes (`vector_store_namespace`, `dimension`) trigger
+      `EnsureNamespace` so the new namespace is materialized before the
+      next request lands; old namespaces are left in place by design (no
+      flush on dimension change).
+    tags:
+      - Cache
+    requestBody:
+      required: true
+      content:
+        application/json:
+          schema:
+            $ref: '../../schemas/management/cache.yaml#/LocalCacheConfig'
+    responses:
+      '200':
+        description: Updated configuration
+        content:
+          application/json:
+            schema:
+              $ref: '../../schemas/management/cache.yaml#/LocalCacheConfig'
+      '400':
+        $ref: '../../openapi.yaml#/components/responses/BadRequest'
+      '500':
+        $ref: '../../openapi.yaml#/components/responses/InternalError'
+      '503':
+        description: Config store not available
diff --git a/docs/openapi/schemas/management/cache.yaml b/docs/openapi/schemas/management/cache.yaml
index 346ee18ac3..73056b279c 100644
--- a/docs/openapi/schemas/management/cache.yaml
+++ b/docs/openapi/schemas/management/cache.yaml
@@ -7,3 +7,66 @@ ClearCacheResponse:
     message:
       type: string
       example: Cache cleared successfully
+
+LocalCacheConfig:
+  type: object
+  description: |
+    Live runtime configuration for the local cache plugin. The framework
+    holds a single shared pointer that the plugin reads on every request,
+    so a successful PUT mutates behavior immediately without a plugin
+    restart. Plugin enable/disable lives separately on
+    `client.enable_local_cache` (PUT /api/config).
+  properties:
+    provider:
+      type: string
+      description: |
+        Embedding provider for semantic search (e.g. "openai"). Leave empty
+        and set `dimension: 1` for direct-only mode without a provider.
+      example: openai
+    embedding_model:
+      type: string
+      description: Embedding model name. Required when `provider` is set.
+      example: text-embedding-3-small
+    cleanup_on_shutdown:
+      type: boolean
+      description: |
+        When true, delete every entry tagged from_bifrost_local_cache_plugin
+        and drop the namespace on plugin Cleanup. Default false leaves
+        entries in place for the next process.
+    ttl:
+      type: string
+      description: |
+        Time-to-live for cached responses. Accepts a Go duration string
+        ("1m", "30s") or a JSON number in seconds.
+      example: "5m"
+    threshold:
+      type: number
+      description: Cosine similarity threshold for semantic matches (0–1).
+      example: 0.8
+    vector_store_namespace:
+      type: string
+      description: Namespace within the vector store to scope entries.
+    dimension:
+      type: integer
+      description: |
+        Embedding dimension. Use 1 for direct-only mode (no semantic search).
+        Must be > 1 when `provider` is set.
+    default_cache_key:
+      type: string
+      description: |
+        Cache key used when no per-request key is supplied. Caching is
+        disabled when both this and the per-request key are empty.
+    conversation_history_threshold:
+      type: integer
+      description: |
+        Skip caching for requests whose conversation history exceeds this
+        many messages. Default 3.
+    cache_by_model:
+      type: boolean
+      description: Include the model in the cache key. Default true.
+    cache_by_provider:
+      type: boolean
+      description: Include the provider in the cache key. Default true.
+    exclude_system_prompt:
+      type: boolean
+      description: Exclude the system prompt from the cache key. Default false.
diff --git a/docs/overview.mdx b/docs/overview.mdx
index ad5b5b2c90..e89472c05d 100644
--- a/docs/overview.mdx
+++ b/docs/overview.mdx
@@ -47,7 +47,7 @@ Bifrost is a high-performance AI gateway that unifies access to 20+ providers Op
   <Card title="MCP Tool Filtering" icon="grid-2" href="/features/governance/mcp-tools">
     Control which MCP tools are available per virtual key with strict allow-lists.
   </Card>
-  <Card title="Semantic Caching" icon="database" href="/features/semantic-caching">
+  <Card title="Local Caching" icon="database" href="/features/local-caching">
     Intelligent response caching based on semantic similarity. Reduce costs and latency for similar queries.
   </Card>
   <Card title="Built-in Observability" icon="cube" href="/features/observability/default">
diff --git a/docs/plugins/writing-go-plugin.mdx b/docs/plugins/writing-go-plugin.mdx
index 013bbf8639..657e190d68 100644
--- a/docs/plugins/writing-go-plugin.mdx
+++ b/docs/plugins/writing-go-plugin.mdx
@@ -952,7 +952,7 @@ Explore production-ready plugins in the Bifrost repository:
 
 - **[Mocker Plugin](https://github.com/maximhq/bifrost/tree/main/plugins/mocker)** - Mock responses for testing
 - **[Logging Plugin](https://github.com/maximhq/bifrost/tree/main/plugins/logging)** - Advanced request/response logging
-- **[Semantic Cache Plugin](https://github.com/maximhq/bifrost/tree/main/plugins/semanticcache)** - Cache based on semantic similarity
+- **[Local Cache Plugin](https://github.com/maximhq/bifrost/tree/main/plugins/localcache)** - Cache based on semantic similarity
 - **[Governance Plugin](https://github.com/maximhq/bifrost/tree/main/plugins/governance)** - Rate limiting and budget controls
 - **[JSON Parser Plugin](https://github.com/maximhq/bifrost/tree/main/plugins/jsonparser)** - Parse and validate JSON responses
 
diff --git a/docs/providers/request-options.mdx b/docs/providers/request-options.mdx
index 5ad6cb5e6b..e5d3b9dea9 100644
--- a/docs/providers/request-options.mdx
+++ b/docs/providers/request-options.mdx
@@ -26,11 +26,11 @@ Bifrost provides request options that control behavior, enable features, and pas
 | `BifrostContextKeySkipKeySelection`        | `-`                                                  | `bool`                | Skip key selection process (Go SDK only)                                                                                              |
 | `BifrostContextKeyURLPath`                 | `-`                                                  | `string`              | Custom URL path appended to provider base URL (Go SDK only)                                                                           |
 | `BifrostContextKeyUseRawRequestBody`       | `-`                                                  | `bool`                | Use raw request body (Go SDK only, requires RawRequestBody field)                                                                     |
-| `semanticcache.CacheKey`                   | `x-bf-cache-key`                                     | `string`              | Custom cache key                                                                                                                      |
-| `semanticcache.CacheTTLKey`                | `x-bf-cache-ttl`                                     | `time.Duration`       | Cache TTL (duration string or seconds)                                                                                                |
-| `semanticcache.CacheThresholdKey`          | `x-bf-cache-threshold`                               | `float64`             | Similarity threshold (0.0-1.0)                                                                                                        |
-| `semanticcache.CacheTypeKey`               | `x-bf-cache-type`                                    | `string`              | Cache type                                                                                                                            |
-| `semanticcache.CacheNoStoreKey`            | `x-bf-cache-no-store`                                | `bool`                | Prevent caching                                                                                                                       |
+| `localcache.CacheKey`                   | `x-bf-cache-key`                                     | `string`              | Custom cache key                                                                                                                      |
+| `localcache.CacheTTLKey`                | `x-bf-cache-ttl`                                     | `time.Duration`       | Cache TTL (duration string or seconds)                                                                                                |
+| `localcache.CacheThresholdKey`          | `x-bf-cache-threshold`                               | `float64`             | Similarity threshold (0.0-1.0)                                                                                                        |
+| `localcache.CacheTypeKey`               | `x-bf-cache-type`                                    | `string`              | Cache type                                                                                                                            |
+| `localcache.CacheNoStoreKey`            | `x-bf-cache-no-store`                                | `bool`                | Prevent caching                                                                                                                       |
 | `mcp-include-clients`                      | `x-bf-mcp-include-clients`                           | `[]string`            | Filter MCP clients (comma-separated).                                                                                                 |
 | `mcp-include-tools`                        | `x-bf-mcp-include-tools`                             | `[]string`            | Filter MCP tools (`clientName-toolName` format, comma-separated)                                                                      |
 | `BifrostContextKeyMCPExtraHeaders`         | _(any header in a client's `allowed_extra_headers`)_ | `map[string][]string` | Headers forwarded to MCP servers at tool execution time, filtered per-client against `allowed_extra_headers`                          |
@@ -719,18 +719,18 @@ The headers `x-bf-eh-user-id` and `x-bf-eh-tracking-id` are forwarded to the pro
 - Custom metadata: `x-bf-eh-department`, `x-bf-eh-cost-center`
 - A/B testing: `x-bf-eh-experiment-id`, `x-bf-eh-variant`
 
-## Semantic Cache Options
+## Local Cache Options
 
 These options control semantic caching behavior.
 
 ### Cache Key
 
-**Context Key:** `semanticcache.CacheKey`
+**Context Key:** `localcache.CacheKey`
 **Header:** `x-bf-cache-key`
 **Type:** `string`
 **Required:** No
 
-Specify a custom cache key for semantic cache lookups.
+Specify a custom cache key for local cache lookups.
 
 <Tabs>
 <Tab title="Gateway (cURL)">
@@ -748,7 +748,7 @@ curl --location 'http://localhost:8080/v1/chat/completions' \
 <Tab title="Go SDK">
 ```go
 ctx := context.Background()
-ctx = context.WithValue(ctx, semanticcache.CacheKey, "custom-key-123")
+ctx = context.WithValue(ctx, localcache.CacheKey, "custom-key-123")
 
 response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{
 Provider: schemas.OpenAI,
@@ -762,7 +762,7 @@ Input: messages,
 
 ### Cache TTL
 
-**Context Key:** `semanticcache.CacheTTLKey`
+**Context Key:** `localcache.CacheTTLKey`
 **Header:** `x-bf-cache-ttl`
 **Type:** `time.Duration` (header value: duration string like `"30s"` or `"5m"`, or seconds as integer)
 **Required:** No
@@ -785,7 +785,7 @@ curl --location 'http://localhost:8080/v1/chat/completions' \
 <Tab title="Go SDK">
 ```go
 ctx := context.Background()
-ctx = context.WithValue(ctx, semanticcache.CacheTTLKey, 5*time.Minute)
+ctx = context.WithValue(ctx, localcache.CacheTTLKey, 5*time.Minute)
 
 response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{
 Provider: schemas.OpenAI,
@@ -801,12 +801,12 @@ Accepts duration strings (`"30s"`, `"5m"`, `"1h"`) or plain numbers (treated as
 
 ### Cache Threshold
 
-**Context Key:** `semanticcache.CacheThresholdKey`
+**Context Key:** `localcache.CacheThresholdKey`
 **Header:** `x-bf-cache-threshold`
 **Type:** `float64` (range: 0.0 to 1.0)
 **Required:** No
 
-Set the similarity threshold for semantic cache matching.
+Set the similarity threshold for local cache matching.
 
 <Tabs>
 <Tab title="Gateway (cURL)">
@@ -824,7 +824,7 @@ curl --location 'http://localhost:8080/v1/chat/completions' \
 <Tab title="Go SDK">
 ```go
 ctx := context.Background()
-ctx = context.WithValue(ctx, semanticcache.CacheThresholdKey, 0.85)
+ctx = context.WithValue(ctx, localcache.CacheThresholdKey, 0.85)
 
 response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{
 Provider: schemas.OpenAI,
@@ -838,9 +838,9 @@ Input: messages,
 
 ### Cache Type
 
-**Context Key:** `semanticcache.CacheTypeKey`
+**Context Key:** `localcache.CacheTypeKey`
 **Header:** `x-bf-cache-type`
-**Type:** `semanticcache.CacheType` (string)
+**Type:** `localcache.CacheType` (string)
 **Required:** No
 
 Specify the cache type for this request.
@@ -861,7 +861,7 @@ curl --location 'http://localhost:8080/v1/chat/completions' \
 <Tab title="Go SDK">
 ```go
 ctx := context.Background()
-ctx = context.WithValue(ctx, semanticcache.CacheTypeKey, semanticcache.CacheTypeSemantic)
+ctx = context.WithValue(ctx, localcache.CacheTypeKey, localcache.CacheTypeSemantic)
 
 response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{
 Provider: schemas.OpenAI,
@@ -875,7 +875,7 @@ Input: messages,
 
 ### Cache No Store
 
-**Context Key:** `semanticcache.CacheNoStoreKey`
+**Context Key:** `localcache.CacheNoStoreKey`
 **Header:** `x-bf-cache-no-store`
 **Type:** `bool` (header value: `"true"`)
 **Required:** No
@@ -898,7 +898,7 @@ curl --location 'http://localhost:8080/v1/chat/completions' \
 <Tab title="Go SDK">
 ```go
 ctx := context.Background()
-ctx = context.WithValue(ctx, semanticcache.CacheNoStoreKey, true)
+ctx = context.WithValue(ctx, localcache.CacheNoStoreKey, true)
 
 response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{
 Provider: schemas.OpenAI,
@@ -1237,5 +1237,5 @@ The following context keys are set by Bifrost internally.
 - **[Gateway Provider Configuration](../quickstart/gateway/provider-configuration)** - Configure providers and headers
 - **[Go SDK Context Keys](../quickstart/go-sdk/context-keys)** - Programmatic context key usage
 - **[Virtual Keys](../features/governance/virtual-keys)** - Virtual key usage and governance
-- **[Semantic Cache](../features/semantic-caching)** - Caching configuration
+- **[Local Cache](../features/local-caching)** - Caching configuration
 ```
diff --git a/examples/configs/withsemanticcache/config.json b/examples/configs/withlocalcache/config.json
similarity index 52%
rename from examples/configs/withsemanticcache/config.json
rename to examples/configs/withlocalcache/config.json
index c01303ba37..b7fd7d437f 100644
--- a/examples/configs/withsemanticcache/config.json
+++ b/examples/configs/withlocalcache/config.json
@@ -1,5 +1,8 @@
 {
   "$schema": "https://www.getbifrost.ai/schema",
+  "client": {
+    "enable_local_cache": true
+  },
   "vector_store": {
     "enabled": true,
     "type": "weaviate",
@@ -8,15 +11,9 @@
       "host": "localhost:9000"
     }
   },
-  "plugins": [
-    {
-      "enabled": true,
-      "name": "semantic_cache",
-      "config": {
-        "dimension": 1,
-        "ttl": 300,
-        "threshold": 0.8
-      }
-    }
-  ]
-}
\ No newline at end of file
+  "local_cache": {
+    "dimension": 1,
+    "ttl": 300,
+    "threshold": 0.8
+  }
+}
diff --git a/examples/configs/withsemanticcachevalkey/config.json b/examples/configs/withlocalcachevalkey/config.json
similarity index 57%
rename from examples/configs/withsemanticcachevalkey/config.json
rename to examples/configs/withlocalcachevalkey/config.json
index 732bcc1029..9314b36c57 100644
--- a/examples/configs/withsemanticcachevalkey/config.json
+++ b/examples/configs/withlocalcachevalkey/config.json
@@ -1,5 +1,8 @@
 {
   "$schema": "https://www.getbifrost.ai/schema",
+  "client": {
+    "enable_local_cache": true
+  },
   "vector_store": {
     "enabled": true,
     "type": "redis",
@@ -14,17 +17,11 @@
       "cluster_mode": true
     }
   },
-  "plugins": [
-    {
-      "enabled": true,
-      "name": "semantic_cache",
-      "config": {
-        "dimension": 1,
-        "ttl": 300,
-        "threshold": 0.8,
-        "default_cache_key": "valkey-repro-cache",
-        "vector_store_namespace": "ValkeySemanticCacheRepro"
-      }
-    }
-  ]
+  "local_cache": {
+    "dimension": 1,
+    "ttl": 300,
+    "threshold": 0.8,
+    "default_cache_key": "valkey-repro-cache",
+    "vector_store_namespace": "ValkeyLocalCacheRepro"
+  }
 }
diff --git a/examples/k8s/examples/README.md b/examples/k8s/examples/README.md
index cc31180622..900407fbe8 100644
--- a/examples/k8s/examples/README.md
+++ b/examples/k8s/examples/README.md
@@ -9,8 +9,8 @@ These examples are split into composable value files so you can combine them wit
 - `values-storage-postgres.yaml` - Storage layer for Postgres mode (chart-managed Postgres)
 - `values-providers.yaml` - Provider keys layer (`openai` + `anthropic`)
 - `values-client-configs.yaml` - Client settings layer (non-MCP, non-model client config options)
-- `values-semantic-search-redis.yaml` - Semantic cache + Redis vector store layer
-- `values-semantic-search-weaviate.yaml` - Semantic cache + Weaviate vector store layer
+- `values-local-cache-redis.yaml` - Local cache + Redis vector store layer
+- `values-local-cache-weaviate.yaml` - Local cache + Weaviate vector store layer
 - `values-mcp-routing.yaml` - MCP + routing layer (latest `mcp.*` globals, MCP client config, chain rule/fallback examples)
 - `values-governance-teams.yaml` - Governance base layer (budgets, rate limits, customers, teams)
 - `values-with-routing-rules-pricing.yaml` - Advanced governance layer (virtual keys, routing rules, pricing overrides, access profile)
@@ -105,27 +105,27 @@ helm upgrade --install "${RELEASE_NAME}" ./helm-charts/bifrost \
   --wait \
   --timeout 5m
 
-# Semantic search stack (Redis vector store)
+# Local cache stack (Redis vector store)
 helm upgrade --install "${RELEASE_NAME}" ./helm-charts/bifrost \
   --namespace "${NAMESPACE}" \
   -f examples/k8s/examples/values.yaml \
   -f examples/k8s/examples/values-storage-sqlite.yaml \
   -f examples/k8s/examples/values-providers.yaml \
-  -f examples/k8s/examples/values-semantic-search-redis.yaml \
+  -f examples/k8s/examples/values-local-cache-redis.yaml \
   --wait \
   --timeout 5m
 
-# Note: this Redis semantic layer uses Redis Stack (search-enabled) because
-# semantic cache requires FT.* commands (RediSearch module). Redis Stack
+# Note: this Redis local-cache layer uses Redis Stack (search-enabled) because
+# local cache requires FT.* commands (RediSearch module). Redis Stack
 # auto-loads search modules at startup.
 
-# Semantic search stack (Weaviate vector store)
+# Local cache stack (Weaviate vector store)
 helm upgrade --install "${RELEASE_NAME}" ./helm-charts/bifrost \
   --namespace "${NAMESPACE}" \
   -f examples/k8s/examples/values.yaml \
   -f examples/k8s/examples/values-storage-sqlite.yaml \
   -f examples/k8s/examples/values-providers.yaml \
-  -f examples/k8s/examples/values-semantic-search-weaviate.yaml \
+  -f examples/k8s/examples/values-local-cache-weaviate.yaml \
   --wait \
   --timeout 5m
 
@@ -184,28 +184,28 @@ If you built a local image (for example `bifrost-local:v1.5.0-prerelease21`) and
 to run with `image.pullPolicy=Never`, use commands like:
 
 ```bash
-# Semantic search + Redis + client config (local image)
+# Local cache + Redis + client config (local image)
 helm upgrade --install "${RELEASE_NAME}" ./helm-charts/bifrost \
   --namespace "${NAMESPACE}" \
   -f examples/k8s/examples/values.yaml \
   -f examples/k8s/examples/values-storage-sqlite.yaml \
   -f examples/k8s/examples/values-providers.yaml \
   -f examples/k8s/examples/values-client-configs.yaml \
-  -f examples/k8s/examples/values-semantic-search-redis.yaml \
+  -f examples/k8s/examples/values-local-cache-redis.yaml \
   --set image.repository=bifrost-local \
   --set image.tag=v1.5.0-prerelease21 \
   --set image.pullPolicy=Never \
   --wait \
   --timeout 5m
 
-# Semantic search + Weaviate + client config (local image)
+# Local cache + Weaviate + client config (local image)
 helm upgrade --install "${RELEASE_NAME}" ./helm-charts/bifrost \
   --namespace "${NAMESPACE}" \
   -f examples/k8s/examples/values.yaml \
   -f examples/k8s/examples/values-storage-sqlite.yaml \
   -f examples/k8s/examples/values-providers.yaml \
   -f examples/k8s/examples/values-client-configs.yaml \
-  -f examples/k8s/examples/values-semantic-search-weaviate.yaml \
+  -f examples/k8s/examples/values-local-cache-weaviate.yaml \
   --set image.repository=bifrost-local \
   --set image.tag=v1.5.0-prerelease21 \
   --set image.pullPolicy=Never \
diff --git a/examples/k8s/examples/values-local-cache-redis.yaml b/examples/k8s/examples/values-local-cache-redis.yaml
new file mode 100644
index 0000000000..731b096366
--- /dev/null
+++ b/examples/k8s/examples/values-local-cache-redis.yaml
@@ -0,0 +1,30 @@
+# Local cache layer using Redis vector store (chart-managed).
+# Merge with values.yaml + storage + providers layers.
+bifrost:
+  client:
+    enableLocalCache: true
+
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  version: 1
+  # Embedding provider keys for the local cache (semantic mode only).
+  keys:
+    - "env.OPENAI_API_KEY"
+  config:
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
+    conversation_history_threshold: 3
+    cache_by_model: true
+    cache_by_provider: true
+    exclude_system_prompt: true
+    cleanup_on_shutdown: false
+    vector_store_namespace: "bifrost-local-cache"
+
+vectorStore:
+  enabled: true
+  type: redis
+  redis:
+    enabled: true
diff --git a/examples/k8s/examples/values-local-cache-weaviate.yaml b/examples/k8s/examples/values-local-cache-weaviate.yaml
new file mode 100644
index 0000000000..72ee8e61ef
--- /dev/null
+++ b/examples/k8s/examples/values-local-cache-weaviate.yaml
@@ -0,0 +1,30 @@
+# Local cache layer using Weaviate vector store (chart-managed).
+# Merge with values.yaml + storage + providers layers.
+bifrost:
+  client:
+    enableLocalCache: true
+
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  version: 1
+  # Embedding provider keys for the local cache (semantic mode only).
+  keys:
+    - "env.OPENAI_API_KEY"
+  config:
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
+    conversation_history_threshold: 3
+    cache_by_model: true
+    cache_by_provider: true
+    exclude_system_prompt: false
+    cleanup_on_shutdown: false
+    vector_store_namespace: "bifrost-local-cache"
+
+vectorStore:
+  enabled: true
+  type: weaviate
+  weaviate:
+    enabled: true
diff --git a/examples/k8s/examples/values-semantic-search-redis.yaml b/examples/k8s/examples/values-semantic-search-redis.yaml
deleted file mode 100644
index 4d9cd18224..0000000000
--- a/examples/k8s/examples/values-semantic-search-redis.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Semantic search/cache layer using Redis vector store (chart-managed).
-# Merge with values.yaml + storage + providers layers.
-bifrost:
-  plugins:
-    semanticCache:
-      enabled: true
-      version: 1
-      config:
-        provider: "openai"
-        # Embedding provider keys for semantic cache.
-        keys:
-          - "env.OPENAI_API_KEY"
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
-        conversation_history_threshold: 3
-        cache_by_model: true
-        cache_by_provider: true
-        exclude_system_prompt: true
-        cleanup_on_shutdown: false
-        vector_store_namespace: "bifrost-semantic-cache"
-
-vectorStore:
-  enabled: true
-  type: redis
-  redis:
-    enabled: true
diff --git a/examples/k8s/examples/values-semantic-search-weaviate.yaml b/examples/k8s/examples/values-semantic-search-weaviate.yaml
deleted file mode 100644
index 7691106be7..0000000000
--- a/examples/k8s/examples/values-semantic-search-weaviate.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Semantic search/cache layer using Weaviate vector store (chart-managed).
-# Merge with values.yaml + storage + providers layers.
-bifrost:
-  plugins:
-    semanticCache:
-      enabled: true
-      version: 1
-      config:
-        provider: "openai"
-        # Embedding provider keys for semantic cache.
-        keys:
-          - "env.OPENAI_API_KEY"
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
-        conversation_history_threshold: 3
-        cache_by_model: true
-        cache_by_provider: true
-        exclude_system_prompt: false
-        cleanup_on_shutdown: false
-        vector_store_namespace: "bifrost-semantic-cache"
-
-vectorStore:
-  enabled: true
-  type: weaviate
-  weaviate:
-    enabled: true
diff --git a/framework/configstore/clientconfig.go b/framework/configstore/clientconfig.go
index afe424d32b..be65a4ac38 100644
--- a/framework/configstore/clientconfig.go
+++ b/framework/configstore/clientconfig.go
@@ -50,6 +50,7 @@ type ClientConfig struct {
 	InitialPoolSize                       int                              `json:"initial_pool_size"`                          // The initial pool size for the bifrost client
 	PrometheusLabels                      []string                         `json:"prometheus_labels"`                          // The labels to be used for prometheus metrics
 	EnableLogging                         *bool                            `json:"enable_logging"`                             // Enable logging of requests and responses
+	EnableLocalCache                      *bool                            `json:"enable_local_cache"`                         // Enable local cache plugin (direct + semantic caching)
 	DisableContentLogging                 bool                             `json:"disable_content_logging"`                    // Disable logging of content
 	AllowPerRequestContentStorageOverride bool                             `json:"allow_per_request_content_storage_override"` // Allow per-request override of content storage via x-bf-disable-content-logging header/context
 	AllowPerRequestRawOverride            bool                             `json:"allow_per_request_raw_override"`             // Allow per-request override of raw request/response visibility via x-bf-send-back-raw-request and x-bf-send-back-raw-response headers
@@ -99,6 +100,12 @@ func (c *ClientConfig) GenerateClientConfigHash() (string, error) {
 		hash.Write([]byte("enableLogging:false"))
 	}
 
+	// Default for EnableLocalCache is false (off). Hash only the non-default
+	// value to avoid churning legacy hashes on upgrade.
+	if c.EnableLocalCache != nil && *c.EnableLocalCache {
+		hash.Write([]byte("enableLocalCache:true"))
+	}
+
 	if c.DisableContentLogging {
 		hash.Write([]byte("disableContentLogging:true"))
 	} else {
diff --git a/framework/configstore/localcache.go b/framework/configstore/localcache.go
new file mode 100644
index 0000000000..ddd1551295
--- /dev/null
+++ b/framework/configstore/localcache.go
@@ -0,0 +1,113 @@
+package configstore
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// LocalCacheConfig is the runtime configuration for the local cache plugin.
+// The framework holds a single *LocalCacheConfig that the plugin shares by
+// pointer. PUT /api/local-cache/config mutates the struct in place via
+// ReloadLocalCacheConfigFromConfigStore so the plugin sees fresh values on
+// the next request without a restart.
+type LocalCacheConfig struct {
+	// Embedding model settings.
+	// Modes:
+	//   - Semantic mode: Provider + EmbeddingModel + Dimension > 0. Both
+	//     direct hash matching and embedding-based similarity search engage.
+	//   - Direct-only mode: Provider="" with Dimension=1. Semantic search is
+	//     disabled; lookups go through the deterministic direct hash path.
+	//     Dimension=1 keeps stores that require a vector happy.
+	Provider       schemas.ModelProvider `json:"provider"`
+	EmbeddingModel string                `json:"embedding_model,omitempty"`
+
+	// Plugin behavior settings
+	CleanUpOnShutdown    bool          `json:"cleanup_on_shutdown,omitempty"`
+	TTL                  time.Duration `json:"ttl,omitempty"`
+	Threshold            float64       `json:"threshold,omitempty"`
+	VectorStoreNamespace string        `json:"vector_store_namespace,omitempty"`
+	Dimension            int           `json:"dimension"`
+
+	// Advanced caching behavior
+	DefaultCacheKey              string `json:"default_cache_key,omitempty"`
+	ConversationHistoryThreshold int    `json:"conversation_history_threshold,omitempty"`
+	CacheByModel                 *bool  `json:"cache_by_model,omitempty"`
+	CacheByProvider              *bool  `json:"cache_by_provider,omitempty"`
+	ExcludeSystemPrompt          *bool  `json:"exclude_system_prompt,omitempty"`
+
+	// ConfigHash is used by the config-sync layer to detect changes between
+	// config.json and the database; not serialized to API responses.
+	ConfigHash string `json:"-"`
+}
+
+// UnmarshalJSON accepts either a duration string ("1m", "1h") or a JSON
+// number (seconds) for the TTL field. Mirrors the prior plugin Config
+// UnmarshalJSON so existing config.json files continue to parse after the
+// rename to local_cache.
+func (c *LocalCacheConfig) UnmarshalJSON(data []byte) error {
+	// alias suppresses LocalCacheConfig's UnmarshalJSON to avoid infinite
+	// recursion. The outer TTL (json.RawMessage) shadows alias.TTL because
+	// the json package picks the shallower field on a name conflict.
+	type alias LocalCacheConfig
+	aux := &struct {
+		TTL json.RawMessage `json:"ttl,omitempty"`
+		*alias
+	}{alias: (*alias)(c)}
+	if err := json.Unmarshal(data, aux); err != nil {
+		return fmt.Errorf("failed to unmarshal local cache config: %w", err)
+	}
+	if len(aux.TTL) == 0 || string(aux.TTL) == "null" {
+		return nil
+	}
+	var s string
+	if err := json.Unmarshal(aux.TTL, &s); err == nil {
+		d, err := time.ParseDuration(s)
+		if err != nil {
+			return fmt.Errorf("failed to parse TTL duration string '%s': %w", s, err)
+		}
+		c.TTL = d
+	} else {
+		var seconds float64
+		if err := json.Unmarshal(aux.TTL, &seconds); err != nil {
+			return fmt.Errorf("unsupported TTL value: %s", string(aux.TTL))
+		}
+		c.TTL = time.Duration(seconds * float64(time.Second))
+	}
+	if c.TTL < 0 {
+		return fmt.Errorf("TTL must be non-negative, got %v", c.TTL)
+	}
+	return nil
+}
+
+// GenerateLocalCacheConfigHash generates a SHA256 hash of the local cache
+// configuration. Used by the config-sync layer to detect when the
+// config.json-side LocalCacheConfig differs from what's persisted in the
+// database.
+func (c *LocalCacheConfig) GenerateLocalCacheConfigHash() (string, error) {
+	hash := sha256.New()
+	hash.Write([]byte("provider:" + string(c.Provider)))
+	hash.Write([]byte("embedding_model:" + c.EmbeddingModel))
+	hash.Write([]byte("cleanup_on_shutdown:" + strconv.FormatBool(c.CleanUpOnShutdown)))
+	hash.Write([]byte("ttl_ns:" + strconv.FormatInt(int64(c.TTL), 10)))
+	hash.Write([]byte("threshold:" + strconv.FormatFloat(c.Threshold, 'f', -1, 64)))
+	hash.Write([]byte("namespace:" + c.VectorStoreNamespace))
+	hash.Write([]byte("dimension:" + strconv.Itoa(c.Dimension)))
+	hash.Write([]byte("default_cache_key:" + c.DefaultCacheKey))
+	hash.Write([]byte("conv_history_threshold:" + strconv.Itoa(c.ConversationHistoryThreshold)))
+	if c.CacheByModel != nil {
+		hash.Write([]byte("cache_by_model:" + strconv.FormatBool(*c.CacheByModel)))
+	}
+	if c.CacheByProvider != nil {
+		hash.Write([]byte("cache_by_provider:" + strconv.FormatBool(*c.CacheByProvider)))
+	}
+	if c.ExcludeSystemPrompt != nil {
+		hash.Write([]byte("exclude_system_prompt:" + strconv.FormatBool(*c.ExcludeSystemPrompt)))
+	}
+	return hex.EncodeToString(hash.Sum(nil)), nil
+}
diff --git a/framework/configstore/migrations.go b/framework/configstore/migrations.go
index cc57a8cec7..1b0bae6fc1 100644
--- a/framework/configstore/migrations.go
+++ b/framework/configstore/migrations.go
@@ -635,6 +635,9 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error {
 	if err := migrationAddMCPClientDisabledColumn(ctx, db); err != nil {
 		return err
 	}
+	if err := migrationAddLocalCacheConfigTable(ctx, db); err != nil {
+		return err
+	}
 	return nil
 }
 
@@ -7253,6 +7256,50 @@ func migrationSplitMCPExternalBaseURL(ctx context.Context, db *gorm.DB) error {
 }
 
 // migrationAddMCPClientDisabledColumn adds the disabled column to the config_mcp_clients table
+// migrationAddLocalCacheConfigTable creates the config_local_cache typed-column
+// table and adds the enable_local_cache flag column to config_client. Both
+// changes are idempotent so reruns are safe; previously-installed clusters
+// pick up the column without losing existing client config rows.
+func migrationAddLocalCacheConfigTable(ctx context.Context, db *gorm.DB) error {
+	m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{
+		ID: "add_local_cache_config_table",
+		Migrate: func(tx *gorm.DB) error {
+			tx = tx.WithContext(ctx)
+			mg := tx.Migrator()
+			if !mg.HasTable(&tables.TableLocalCacheConfig{}) {
+				if err := mg.CreateTable(&tables.TableLocalCacheConfig{}); err != nil {
+					return fmt.Errorf("failed to create config_local_cache table: %w", err)
+				}
+			}
+			if !mg.HasColumn(&tables.TableClientConfig{}, "enable_local_cache") {
+				if err := mg.AddColumn(&tables.TableClientConfig{}, "enable_local_cache"); err != nil {
+					return fmt.Errorf("failed to add enable_local_cache column: %w", err)
+				}
+			}
+			return nil
+		},
+		Rollback: func(tx *gorm.DB) error {
+			tx = tx.WithContext(ctx)
+			mg := tx.Migrator()
+			if mg.HasColumn(&tables.TableClientConfig{}, "enable_local_cache") {
+				if err := mg.DropColumn(&tables.TableClientConfig{}, "enable_local_cache"); err != nil {
+					return fmt.Errorf("failed to drop enable_local_cache column: %w", err)
+				}
+			}
+			if mg.HasTable(&tables.TableLocalCacheConfig{}) {
+				if err := mg.DropTable(&tables.TableLocalCacheConfig{}); err != nil {
+					return fmt.Errorf("failed to drop config_local_cache table: %w", err)
+				}
+			}
+			return nil
+		},
+	}})
+	if err := m.Migrate(); err != nil {
+		return fmt.Errorf("error running add_local_cache_config_table migration: %s", err.Error())
+	}
+	return nil
+}
+
 func migrationAddMCPClientDisabledColumn(ctx context.Context, db *gorm.DB) error {
 	m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{
 		ID: "add_mcp_client_disabled_column",
diff --git a/framework/configstore/rdb.go b/framework/configstore/rdb.go
index 18919d31c6..0214bfd960 100644
--- a/framework/configstore/rdb.go
+++ b/framework/configstore/rdb.go
@@ -156,6 +156,7 @@ func (s *RDBConfigStore) UpdateClientConfig(ctx context.Context, config *ClientC
 		DropExcessRequests:                    config.DropExcessRequests,
 		InitialPoolSize:                       config.InitialPoolSize,
 		EnableLogging:                         config.EnableLogging,
+		EnableLocalCache:                      config.EnableLocalCache,
 		DisableContentLogging:                 config.DisableContentLogging,
 		DisableDBPingsInHealth:                config.DisableDBPingsInHealth,
 		LogRetentionDays:                      config.LogRetentionDays,
@@ -369,6 +370,7 @@ func (s *RDBConfigStore) GetClientConfig(ctx context.Context) (*ClientConfig, er
 		InitialPoolSize:         dbConfig.InitialPoolSize,
 		PrometheusLabels:        dbConfig.PrometheusLabels,
 		EnableLogging:           dbConfig.EnableLogging,
+		EnableLocalCache:        dbConfig.EnableLocalCache,
 		DisableContentLogging:   dbConfig.DisableContentLogging,
 		DisableDBPingsInHealth:  dbConfig.DisableDBPingsInHealth,
 		LogRetentionDays:        dbConfig.LogRetentionDays,
diff --git a/framework/configstore/rdb_localcache.go b/framework/configstore/rdb_localcache.go
new file mode 100644
index 0000000000..439e0d9b1b
--- /dev/null
+++ b/framework/configstore/rdb_localcache.go
@@ -0,0 +1,69 @@
+package configstore
+
+import (
+	"context"
+	"errors"
+	"time"
+
+	"github.com/maximhq/bifrost/core/schemas"
+	"github.com/maximhq/bifrost/framework/configstore/tables"
+	"gorm.io/gorm"
+)
+
+// GetLocalCacheConfig retrieves the local-cache configuration from the database.
+// Returns (nil, nil) when no row exists, so callers can distinguish "not yet
+// configured" from a hard error and apply their own defaults.
+func (s *RDBConfigStore) GetLocalCacheConfig(ctx context.Context) (*LocalCacheConfig, error) {
+	var dbConfig tables.TableLocalCacheConfig
+	if err := s.DB().WithContext(ctx).First(&dbConfig).Error; err != nil {
+		if errors.Is(err, gorm.ErrRecordNotFound) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	return &LocalCacheConfig{
+		Provider:                     schemas.ModelProvider(dbConfig.Provider),
+		EmbeddingModel:               dbConfig.EmbeddingModel,
+		CleanUpOnShutdown:            dbConfig.CleanUpOnShutdown,
+		TTL:                          time.Duration(dbConfig.TTLSeconds) * time.Second,
+		Threshold:                    dbConfig.Threshold,
+		VectorStoreNamespace:         dbConfig.VectorStoreNamespace,
+		Dimension:                    dbConfig.Dimension,
+		DefaultCacheKey:              dbConfig.DefaultCacheKey,
+		ConversationHistoryThreshold: dbConfig.ConversationHistoryThreshold,
+		CacheByModel:                 dbConfig.CacheByModel,
+		CacheByProvider:              dbConfig.CacheByProvider,
+		ExcludeSystemPrompt:          dbConfig.ExcludeSystemPrompt,
+		ConfigHash:                   dbConfig.ConfigHash,
+	}, nil
+}
+
+// UpdateLocalCacheConfig persists the local-cache configuration. The table is
+// single-row: existing rows are deleted before insert so callers always
+// observe exactly one row.
+func (s *RDBConfigStore) UpdateLocalCacheConfig(ctx context.Context, config *LocalCacheConfig) error {
+	if config == nil {
+		return nil
+	}
+	dbConfig := tables.TableLocalCacheConfig{
+		Provider:                     string(config.Provider),
+		EmbeddingModel:               config.EmbeddingModel,
+		CleanUpOnShutdown:            config.CleanUpOnShutdown,
+		TTLSeconds:                   int64(config.TTL / time.Second),
+		Threshold:                    config.Threshold,
+		VectorStoreNamespace:         config.VectorStoreNamespace,
+		Dimension:                    config.Dimension,
+		DefaultCacheKey:              config.DefaultCacheKey,
+		ConversationHistoryThreshold: config.ConversationHistoryThreshold,
+		CacheByModel:                 config.CacheByModel,
+		CacheByProvider:              config.CacheByProvider,
+		ExcludeSystemPrompt:          config.ExcludeSystemPrompt,
+		ConfigHash:                   config.ConfigHash,
+	}
+	return s.DB().WithContext(ctx).Transaction(func(tx *gorm.DB) error {
+		if err := tx.Session(&gorm.Session{AllowGlobalUpdate: true}).Delete(&tables.TableLocalCacheConfig{}).Error; err != nil {
+			return err
+		}
+		return tx.Create(&dbConfig).Error
+	})
+}
diff --git a/framework/configstore/store.go b/framework/configstore/store.go
index fa6cf781fb..3fb7e6348e 100644
--- a/framework/configstore/store.go
+++ b/framework/configstore/store.go
@@ -127,6 +127,10 @@ type ConfigStore interface {
 	UpdateVectorStoreConfig(ctx context.Context, config *vectorstore.Config) error
 	GetVectorStoreConfig(ctx context.Context) (*vectorstore.Config, error)
 
+	// Local cache config CRUD
+	GetLocalCacheConfig(ctx context.Context) (*LocalCacheConfig, error)
+	UpdateLocalCacheConfig(ctx context.Context, config *LocalCacheConfig) error
+
 	// Logs store config CRUD
 	UpdateLogsStoreConfig(ctx context.Context, config *logstore.Config) error
 	GetLogsStoreConfig(ctx context.Context) (*logstore.Config, error)
diff --git a/framework/configstore/tables/clientconfig.go b/framework/configstore/tables/clientconfig.go
index 9afe044eaa..6e2598711f 100644
--- a/framework/configstore/tables/clientconfig.go
+++ b/framework/configstore/tables/clientconfig.go
@@ -17,6 +17,7 @@ type TableClientConfig struct {
 	HeaderFilterConfigJSON                string `gorm:"type:text" json:"-"` // JSON serialized GlobalHeaderFilterConfig
 	InitialPoolSize                       int    `gorm:"default:300" json:"initial_pool_size"`
 	EnableLogging                         *bool  `gorm:"default:true" json:"enable_logging"`
+	EnableLocalCache                      *bool  `gorm:"default:false" json:"enable_local_cache"`      // Toggles loading of the local cache plugin; when flipped, the compat-shim ReloadPlugins/RemovePlugins it
 	DisableContentLogging                 bool   `gorm:"default:false" json:"disable_content_logging"` // DisableContentLogging controls whether sensitive content (inputs, outputs, embeddings, etc.) is logged
 	DisableDBPingsInHealth                bool   `gorm:"default:false" json:"disable_db_pings_in_health"`
 	LogRetentionDays                      int    `gorm:"default:365" json:"log_retention_days" validate:"min=1"` // Number of days to retain logs (minimum 1 day)
diff --git a/framework/configstore/tables/localcache.go b/framework/configstore/tables/localcache.go
new file mode 100644
index 0000000000..3774814b5c
--- /dev/null
+++ b/framework/configstore/tables/localcache.go
@@ -0,0 +1,37 @@
+package tables
+
+import (
+	"time"
+)
+
+// TableLocalCacheConfig holds the persisted configuration for the local cache
+// plugin. Single-row table (mirrors TableClientConfig); updates use
+// DELETE-then-INSERT in a transaction so callers always observe exactly one
+// row. Fields are stored as typed columns rather than a JSON blob so future
+// migrations can target individual columns.
+type TableLocalCacheConfig struct {
+	ID                           uint    `gorm:"primaryKey;autoIncrement" json:"id"`
+	Provider                     string  `gorm:"type:varchar(64);default:''" json:"provider"`
+	EmbeddingModel               string  `gorm:"type:varchar(255);default:''" json:"embedding_model"`
+	CleanUpOnShutdown            bool    `gorm:"default:false" json:"cleanup_on_shutdown"`
+	TTLSeconds                   int64   `gorm:"default:0" json:"ttl_seconds"`
+	Threshold                    float64 `gorm:"default:0" json:"threshold"`
+	VectorStoreNamespace         string  `gorm:"type:varchar(255);default:''" json:"vector_store_namespace"`
+	Dimension                    int     `gorm:"default:0" json:"dimension"`
+	DefaultCacheKey              string  `gorm:"type:varchar(255);default:''" json:"default_cache_key"`
+	ConversationHistoryThreshold int     `gorm:"default:0" json:"conversation_history_threshold"`
+	// Nullable so callers can distinguish "default" (nil) from an explicit
+	// false. Plugin defaults: CacheByModel=true, CacheByProvider=true,
+	// ExcludeSystemPrompt=false.
+	CacheByModel        *bool `gorm:"" json:"cache_by_model"`
+	CacheByProvider     *bool `gorm:"" json:"cache_by_provider"`
+	ExcludeSystemPrompt *bool `gorm:"" json:"exclude_system_prompt"`
+
+	// ConfigHash detects changes synced from config.json.
+	ConfigHash string `gorm:"type:varchar(255);null" json:"config_hash"`
+
+	CreatedAt time.Time `gorm:"index;not null" json:"created_at"`
+	UpdatedAt time.Time `gorm:"index;not null" json:"updated_at"`
+}
+
+func (TableLocalCacheConfig) TableName() string { return "config_local_cache" }
diff --git a/framework/modelcatalog/pricing.go b/framework/modelcatalog/pricing.go
index 68b6551be5..c168c5e174 100644
--- a/framework/modelcatalog/pricing.go
+++ b/framework/modelcatalog/pricing.go
@@ -226,7 +226,7 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse, scopes *P
 		s = *scopes
 	}
 
-	// Handle semantic cache billing
+	// Handle local cache billing
 	cacheDebug := result.GetExtraFields().CacheDebug
 	if cacheDebug != nil {
 		return mc.calculateCostWithCache(result, cacheDebug, s)
@@ -235,7 +235,7 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse, scopes *P
 	return mc.calculateBaseCost(result, s)
 }
 
-// calculateCostWithCache handles cost calculation when semantic cache debug info is present.
+// calculateCostWithCache handles cost calculation when local cache debug info is present.
 func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 {
 	if cacheDebug.CacheHit {
 		// Direct cache hit — no LLM call, no cost
diff --git a/framework/modelcatalog/pricing_test.go b/framework/modelcatalog/pricing_test.go
index 3c184bed4b..91c69644cf 100644
--- a/framework/modelcatalog/pricing_test.go
+++ b/framework/modelcatalog/pricing_test.go
@@ -1142,7 +1142,7 @@ func TestExtractCostInput_VideoResponseInvalidSeconds(t *testing.T) {
 // 10. Semantic cache billing (calculateCostWithCache)
 // =========================================================================
 
-func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) {
+func TestCalculateCost_LocalCacheDirectHit(t *testing.T) {
 	mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
 		makeKey("gpt-4o", "openai", "chat"): {
 			Model: "gpt-4o", Provider: "openai", Mode: "chat",
@@ -1170,7 +1170,7 @@ func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) {
 	assert.Equal(t, 0.0, cost)
 }
 
-func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) {
+func TestCalculateCost_LocalCacheSemanticHit(t *testing.T) {
 	embProvider := "openai"
 	embModel := "text-embedding-3-small"
 	embTokens := 500
@@ -1210,7 +1210,7 @@ func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) {
 	assert.InDelta(t, 0.00001, cost, 1e-12)
 }
 
-func TestCalculateCost_SemanticCacheMiss(t *testing.T) {
+func TestCalculateCost_LocalCacheMiss(t *testing.T) {
 	embProvider := "openai"
 	embModel := "text-embedding-3-small"
 	embTokens := 500
@@ -1250,7 +1250,7 @@ func TestCalculateCost_SemanticCacheMiss(t *testing.T) {
 	assert.InDelta(t, 0.01251, cost, 1e-12)
 }
 
-func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) {
+func TestCalculateCost_LocalCacheHitNoEmbeddingInfo(t *testing.T) {
 	mc := testCatalogWithPricing(nil)
 
 	resp := &schemas.BifrostResponse{
diff --git a/framework/streaming/accumulator.go b/framework/streaming/accumulator.go
index 549140b529..ce77b01407 100644
--- a/framework/streaming/accumulator.go
+++ b/framework/streaming/accumulator.go
@@ -50,7 +50,7 @@ func (a *Accumulator) putChatStreamChunk(chunk *ChatStreamChunk) {
 	chunk.Timestamp = time.Time{}
 	chunk.Delta = nil
 	chunk.Cost = nil
-	chunk.SemanticCacheDebug = nil
+	chunk.LocalCacheDebug = nil
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
@@ -68,7 +68,7 @@ func (a *Accumulator) putAudioStreamChunk(chunk *AudioStreamChunk) {
 	chunk.Timestamp = time.Time{}
 	chunk.Delta = nil
 	chunk.Cost = nil
-	chunk.SemanticCacheDebug = nil
+	chunk.LocalCacheDebug = nil
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
@@ -86,7 +86,7 @@ func (a *Accumulator) putTranscriptionStreamChunk(chunk *TranscriptionStreamChun
 	chunk.Timestamp = time.Time{}
 	chunk.Delta = nil
 	chunk.Cost = nil
-	chunk.SemanticCacheDebug = nil
+	chunk.LocalCacheDebug = nil
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
@@ -104,7 +104,7 @@ func (a *Accumulator) putResponsesStreamChunk(chunk *ResponsesStreamChunk) {
 	chunk.Timestamp = time.Time{}
 	chunk.StreamResponse = nil
 	chunk.Cost = nil
-	chunk.SemanticCacheDebug = nil
+	chunk.LocalCacheDebug = nil
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
@@ -126,7 +126,7 @@ func (a *Accumulator) putImageStreamChunk(chunk *ImageStreamChunk) {
 	chunk.ChunkIndex = 0
 	chunk.ImageIndex = 0
 	chunk.Cost = nil
-	chunk.SemanticCacheDebug = nil
+	chunk.LocalCacheDebug = nil
 	chunk.TokenUsage = nil
 	chunk.RawResponse = nil
 	a.imageStreamChunkPool.Put(chunk)
diff --git a/framework/streaming/audio.go b/framework/streaming/audio.go
index 0390ea5aaf..b2e83aa6db 100644
--- a/framework/streaming/audio.go
+++ b/framework/streaming/audio.go
@@ -84,8 +84,8 @@ func (a *Accumulator) processAccumulatedAudioStreamingChunks(requestID string, b
 		if lastChunk.Cost != nil {
 			data.Cost = lastChunk.Cost
 		}
-		if lastChunk.SemanticCacheDebug != nil {
-			data.CacheDebug = lastChunk.SemanticCacheDebug
+		if lastChunk.LocalCacheDebug != nil {
+			data.CacheDebug = lastChunk.LocalCacheDebug
 		}
 	}
 	// Accumulate raw response using strings.Builder to avoid O(n^2) string concatenation
@@ -149,7 +149,7 @@ func (a *Accumulator) processAudioStreamingResponse(ctx *schemas.BifrostContext,
 				cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
 				chunk.Cost = bifrost.Ptr(cost)
 			}
-			chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
+			chunk.LocalCacheDebug = result.GetExtraFields().CacheDebug
 		}
 	}
 	if addErr := a.addAudioStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go
index 0c03ab3d40..fda2c07cf4 100644
--- a/framework/streaming/chat.go
+++ b/framework/streaming/chat.go
@@ -407,8 +407,8 @@ func (a *Accumulator) processAccumulatedChatStreamingChunks(requestID string, re
 		if lastChunk.TokenUsage != nil {
 			data.TokenUsage = lastChunk.TokenUsage
 		}
-		if lastChunk.SemanticCacheDebug != nil {
-			data.CacheDebug = lastChunk.SemanticCacheDebug
+		if lastChunk.LocalCacheDebug != nil {
+			data.CacheDebug = lastChunk.LocalCacheDebug
 		}
 		if lastChunk.Cost != nil {
 			data.Cost = lastChunk.Cost
@@ -504,7 +504,7 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext,
 				cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
 				chunk.Cost = bifrost.Ptr(cost)
 			}
-			chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
+			chunk.LocalCacheDebug = result.GetExtraFields().CacheDebug
 		}
 	} else if result != nil && result.ChatResponse != nil {
 		// Extract delta and other information
@@ -530,7 +530,7 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext,
 				cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
 				chunk.Cost = bifrost.Ptr(cost)
 			}
-			chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
+			chunk.LocalCacheDebug = result.GetExtraFields().CacheDebug
 		}
 	}
 	if addErr := a.addChatStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
diff --git a/framework/streaming/images.go b/framework/streaming/images.go
index 367b52c037..a4624de0df 100644
--- a/framework/streaming/images.go
+++ b/framework/streaming/images.go
@@ -193,11 +193,11 @@ func (a *Accumulator) processAccumulatedImageStreamingChunks(requestID string, b
 		}
 	}
 
-	// Update semantic cache debug and raw response from final chunk if available
+	// Update local cache debug and raw response from final chunk if available
 	if len(acc.ImageStreamChunks) > 0 {
 		lastChunk := acc.ImageStreamChunks[len(acc.ImageStreamChunks)-1]
-		if lastChunk.SemanticCacheDebug != nil {
-			data.CacheDebug = lastChunk.SemanticCacheDebug
+		if lastChunk.LocalCacheDebug != nil {
+			data.CacheDebug = lastChunk.LocalCacheDebug
 		}
 		if lastChunk.RawResponse != nil {
 			data.RawResponse = lastChunk.RawResponse
@@ -277,7 +277,7 @@ func (a *Accumulator) processImageStreamingResponse(ctx *schemas.BifrostContext,
 				cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
 				chunk.Cost = bifrost.Ptr(cost)
 			}
-			chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
+			chunk.LocalCacheDebug = result.GetExtraFields().CacheDebug
 			chunk.FinishReason = bifrost.Ptr("completed")
 		}
 	}
diff --git a/framework/streaming/responses.go b/framework/streaming/responses.go
index 7712d6ecaa..47295c542a 100644
--- a/framework/streaming/responses.go
+++ b/framework/streaming/responses.go
@@ -848,8 +848,8 @@ func (a *Accumulator) processAccumulatedResponsesStreamingChunks(requestID strin
 		if lastChunk.TokenUsage != nil {
 			data.TokenUsage = lastChunk.TokenUsage
 		}
-		if lastChunk.SemanticCacheDebug != nil {
-			data.CacheDebug = lastChunk.SemanticCacheDebug
+		if lastChunk.LocalCacheDebug != nil {
+			data.CacheDebug = lastChunk.LocalCacheDebug
 		}
 		if lastChunk.Cost != nil {
 			data.Cost = lastChunk.Cost
@@ -933,7 +933,7 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont
 				cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
 				chunk.Cost = bifrost.Ptr(cost)
 			}
-			chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
+			chunk.LocalCacheDebug = result.GetExtraFields().CacheDebug
 		}
 	}
 
diff --git a/framework/streaming/transcription.go b/framework/streaming/transcription.go
index 3367e25ad6..638bac308a 100644
--- a/framework/streaming/transcription.go
+++ b/framework/streaming/transcription.go
@@ -96,8 +96,8 @@ func (a *Accumulator) processAccumulatedTranscriptionStreamingChunks(requestID s
 		if lastChunk.Cost != nil {
 			data.Cost = lastChunk.Cost
 		}
-		if lastChunk.SemanticCacheDebug != nil {
-			data.CacheDebug = lastChunk.SemanticCacheDebug
+		if lastChunk.LocalCacheDebug != nil {
+			data.CacheDebug = lastChunk.LocalCacheDebug
 		}
 	}
 	// Accumulate raw response using strings.Builder to avoid O(n^2) string concatenation
@@ -166,7 +166,7 @@ func (a *Accumulator) processTranscriptionStreamingResponse(ctx *schemas.Bifrost
 				cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
 				chunk.Cost = bifrost.Ptr(cost)
 			}
-			chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
+			chunk.LocalCacheDebug = result.GetExtraFields().CacheDebug
 		}
 	}
 	if addErr := a.addTranscriptionStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
diff --git a/framework/streaming/types.go b/framework/streaming/types.go
index 16779891d7..d60dc4e8ad 100644
--- a/framework/streaming/types.go
+++ b/framework/streaming/types.go
@@ -52,7 +52,7 @@ type AudioStreamChunk struct {
 	Delta              *schemas.BifrostSpeechStreamResponse // The actual delta content
 	FinishReason       *string                              // If this is the final chunk
 	TokenUsage         *schemas.SpeechUsage                 // Token usage if available
-	SemanticCacheDebug *schemas.BifrostCacheDebug           // Semantic cache debug if available
+	LocalCacheDebug *schemas.BifrostCacheDebug           // Local cache debug if available
 	Cost               *float64                             // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                // Error if any
 	ChunkIndex         int                                  // Index of the chunk in the stream
@@ -65,7 +65,7 @@ type TranscriptionStreamChunk struct {
 	Delta              *schemas.BifrostTranscriptionStreamResponse // The actual delta content
 	FinishReason       *string                                     // If this is the final chunk
 	TokenUsage         *schemas.TranscriptionUsage                 // Token usage if available
-	SemanticCacheDebug *schemas.BifrostCacheDebug                  // Semantic cache debug if available
+	LocalCacheDebug *schemas.BifrostCacheDebug                  // Local cache debug if available
 	Cost               *float64                                    // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                       // Error if any
 	ChunkIndex         int                                         // Index of the chunk in the stream
@@ -79,7 +79,7 @@ type ChatStreamChunk struct {
 	FinishReason       *string                                // If this is the final chunk
 	LogProbs           *schemas.BifrostLogProbs               // LogProbs if available
 	TokenUsage         *schemas.BifrostLLMUsage               // Token usage if available
-	SemanticCacheDebug *schemas.BifrostCacheDebug             // Semantic cache debug if available
+	LocalCacheDebug *schemas.BifrostCacheDebug             // Local cache debug if available
 	Cost               *float64                               // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                  // Error if any
 	ChunkIndex         int                                    // Index of the chunk in the stream
@@ -92,7 +92,7 @@ type ResponsesStreamChunk struct {
 	StreamResponse     *schemas.BifrostResponsesStreamResponse // The actual stream response
 	FinishReason       *string                                 // If this is the final chunk
 	TokenUsage         *schemas.BifrostLLMUsage                // Token usage if available
-	SemanticCacheDebug *schemas.BifrostCacheDebug              // Semantic cache debug if available
+	LocalCacheDebug *schemas.BifrostCacheDebug              // Local cache debug if available
 	Cost               *float64                                // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                   // Error if any
 	ChunkIndex         int                                     // Index of the chunk in the stream
@@ -108,7 +108,7 @@ type ImageStreamChunk struct {
 	ImageIndex         int                                           // Index of the image in the stream
 	ErrorDetails       *schemas.BifrostError                         // Error if any
 	Cost               *float64                                      // Cost in dollars from pricing plugin
-	SemanticCacheDebug *schemas.BifrostCacheDebug                    // Semantic cache debug if available
+	LocalCacheDebug *schemas.BifrostCacheDebug                    // Local cache debug if available
 	TokenUsage         *schemas.ImageUsage                           // Token usage if available
 	RawResponse        *string                                       // Raw response if available
 }
diff --git a/framework/vectorstore/pinecone_test.go b/framework/vectorstore/pinecone_test.go
index 7c2fdb3602..1cb9693749 100644
--- a/framework/vectorstore/pinecone_test.go
+++ b/framework/vectorstore/pinecone_test.go
@@ -517,7 +517,7 @@ func TestPineconeStore_ErrorHandling(t *testing.T) {
 	assert.Contains(t, err.Error(), "id is required")
 }
 
-func TestPineconeStore_SemanticCacheWorkflow(t *testing.T) {
+func TestPineconeStore_LocalCacheWorkflow(t *testing.T) {
 	if testing.Short() {
 		t.Skip("Skipping integration tests in short mode")
 	}
@@ -525,7 +525,7 @@ func TestPineconeStore_SemanticCacheWorkflow(t *testing.T) {
 	setup := NewPineconeTestSetup(t)
 	defer setup.Cleanup(t)
 
-	// Simulate a semantic cache workflow
+	// Simulate a local cache workflow
 	cacheEntries := []struct {
 		key       string
 		embedding []float32
diff --git a/framework/vectorstore/redis.go b/framework/vectorstore/redis.go
index 3655563015..6c8e4b889b 100644
--- a/framework/vectorstore/redis.go
+++ b/framework/vectorstore/redis.go
@@ -1284,7 +1284,7 @@ func (s *RedisStore) Add(ctx context.Context, namespace string, id string, embed
 
 	// Store as hash for efficient native vector search
 	if err := s.client.HSet(ctx, key, fields).Err(); err != nil {
-		return fmt.Errorf("failed to store semantic cache entry: %w", err)
+		return fmt.Errorf("failed to store local cache entry: %w", err)
 	}
 
 	return nil
diff --git a/framework/vectorstore/redis_test.go b/framework/vectorstore/redis_test.go
index 4adcd8a6a3..5f069a786b 100644
--- a/framework/vectorstore/redis_test.go
+++ b/framework/vectorstore/redis_test.go
@@ -146,7 +146,7 @@ func (ts *RedisTestSetup) ensureNamespaceExists(t *testing.T) {
 		"response": {
 			DataType: VectorStorePropertyTypeString,
 		},
-		"from_bifrost_semantic_cache_plugin": {
+		"from_bifrost_local_cache_plugin": {
 			DataType: VectorStorePropertyTypeBoolean,
 		},
 	}
@@ -1451,7 +1451,7 @@ func TestRedisStore_CompleteUseCases(t *testing.T) {
 		assert.GreaterOrEqual(t, len(vectorResults), 1)
 	})
 
-	t.Run("Semantic Cache-like Workflow", func(t *testing.T) {
+	t.Run("Local Cache-like Workflow", func(t *testing.T) {
 		// Add request-response pairs with parameters
 		cacheEntries := []struct {
 			key       string
@@ -1466,7 +1466,7 @@ func TestRedisStore_CompleteUseCases(t *testing.T) {
 					"user":                               "u1",
 					"lang":                               "en",
 					"response":                           "answer1",
-					"from_bifrost_semantic_cache_plugin": true,
+					"from_bifrost_local_cache_plugin": true,
 				},
 			},
 			{
@@ -1477,12 +1477,12 @@ func TestRedisStore_CompleteUseCases(t *testing.T) {
 					"user":                               "u1",
 					"lang":                               "es",
 					"response":                           "answer2",
-					"from_bifrost_semantic_cache_plugin": true,
+					"from_bifrost_local_cache_plugin": true,
 				},
 			},
 		}
 
-		filterFields := []string{"request_hash", "user", "lang", "response", "from_bifrost_semantic_cache_plugin"}
+		filterFields := []string{"request_hash", "user", "lang", "response", "from_bifrost_local_cache_plugin"}
 
 		for _, entry := range cacheEntries {
 			err := setup.Store.Add(setup.ctx, TestNamespace, entry.key, entry.embedding, entry.metadata)
diff --git a/framework/vectorstore/weaviate_test.go b/framework/vectorstore/weaviate_test.go
index 68c0f1678a..839fd63d45 100644
--- a/framework/vectorstore/weaviate_test.go
+++ b/framework/vectorstore/weaviate_test.go
@@ -644,7 +644,7 @@ func TestWeaviateStore_CompleteUseCases(t *testing.T) {
 		assert.Len(t, vectorResults, 2) // Both of Alice's content
 	})
 
-	t.Run("Semantic Cache-like Workflow", func(t *testing.T) {
+	t.Run("Local Cache-like Workflow", func(t *testing.T) {
 		// Add request-response pairs with parameters
 		cacheEntries := []struct {
 			key       string
diff --git a/helm-charts/bifrost/README.md b/helm-charts/bifrost/README.md
index 54cdedaaef..f5b0f5dab3 100644
--- a/helm-charts/bifrost/README.md
+++ b/helm-charts/bifrost/README.md
@@ -57,9 +57,9 @@ Official Helm charts for deploying [Bifrost](https://github.com/maximhq/bifrost)
 
 ### 2.1.7
 
-- Added semantic cache Helm layers and examples:
-  - Added Redis deployment template for semantic cache.
-  - Extended Helm values/schema coverage for semantic cache and client-config examples.
+- Added local cache Helm layers and examples:
+  - Added Redis deployment template for the local cache.
+  - Extended Helm values/schema coverage for the local cache and client-config examples.
 - Added enterprise/governance Helm support:
   - Added governance `business_units` support in Helm schema/template rendering.
   - Added deferred virtual-key/provider-config budget ordering handling in Helm rendering.
@@ -72,7 +72,7 @@ Official Helm charts for deploying [Bifrost](https://github.com/maximhq/bifrost)
 - Includes unreleased `2.1.5` changes 
 - Built-in plugin versioning for DB-backed deployments:
   - Added `version` field support for built-in plugins.
-  - Added default `version: 1` for built-in plugins in `values.yaml` (`telemetry`, `logging`, `governance`, `maxim`, `semanticCache`, `otel`, `datadog`).
+  - Added default `version: 1` for built-in plugins in `values.yaml` (`telemetry`, `logging`, `governance`, `maxim`, `localCache`, `otel`, `datadog`).
   - Updated `_helpers.tpl` to include plugin `version` in rendered config when set (cast as integer).
 - Updated StatefulSet PVC template labels to be immutable-safe:
   - `spec.volumeClaimTemplates.metadata.labels` now uses stable selector labels (without chart/app version labels).
@@ -433,9 +433,9 @@ postgresql:
 | `postgresql.external.enabled` | Use external PostgreSQL | `false` |
 | `postgresql.external.host` | External PostgreSQL host | `""` |
 
-### Vector Store Configuration (Semantic Caching)
+### Vector Store Configuration (Local Cache)
 
-Bifrost supports multiple vector stores for semantic caching:
+Bifrost supports multiple vector stores for the local cache:
 
 | Parameter | Description | Default |
 |-----------|-------------|---------|
@@ -520,7 +520,7 @@ bifrost:
 | Telemetry | `bifrost.plugins.telemetry.enabled` | Enable metrics collection |
 | Logging | `bifrost.plugins.logging.enabled` | Enable request logging |
 | Governance | `bifrost.plugins.governance.enabled` | Enable budget management |
-| Semantic Cache | `bifrost.plugins.semanticCache.enabled` | Enable semantic caching |
+| Local Cache | `bifrost.client.enableLocalCache` | Enable the local cache plugin |
 | OTEL | `bifrost.plugins.otel.enabled` | Enable OpenTelemetry integration |
 | Maxim | `bifrost.plugins.maxim.enabled` | Enable Maxim observability |
 | Datadog | `bifrost.plugins.datadog.enabled` | Enable Datadog APM integration |
@@ -654,9 +654,9 @@ The chart includes pre-configured examples in `values-examples/`:
 | `sqlite-only.yaml` | Simple setup with SQLite (local development) |
 | `postgres-only.yaml` | PostgreSQL for config and logs |
 | `mixed-backend.yaml` | SQLite for config + PostgreSQL for logs (mixed backend) |
-| `postgres-weaviate.yaml` | PostgreSQL + Weaviate for semantic caching |
-| `postgres-redis.yaml` | PostgreSQL + Redis for semantic caching |
-| `postgres-qdrant.yaml` | PostgreSQL + Qdrant for semantic caching |
+| `postgres-weaviate.yaml` | PostgreSQL + Weaviate for the local cache |
+| `postgres-redis.yaml` | PostgreSQL + Redis for the local cache |
+| `postgres-qdrant.yaml` | PostgreSQL + Qdrant for the local cache |
 | `sqlite-weaviate.yaml` | SQLite + Weaviate |
 | `sqlite-redis.yaml` | SQLite + Redis |
 | `sqlite-qdrant.yaml` | SQLite + Qdrant |
@@ -680,7 +680,7 @@ helm install bifrost ./bifrost -f ./bifrost/values-examples/postgres-only.yaml
 For production deployments, we recommend:
 
 1. **Use PostgreSQL** for reliable data persistence
-2. **Enable semantic caching** with Weaviate, Redis, or Qdrant
+2. **Enable the local cache** with Weaviate, Redis, or Qdrant
 3. **Configure auto-scaling** for handling variable load
 4. **Set up Ingress** with TLS termination
 5. **Use external secrets** for sensitive data
@@ -735,13 +735,17 @@ bifrost:
     initialPoolSize: 1000
     allowedOrigins:
       - "https://yourdomain.com"
+    enableLocalCache: true
   plugins:
-    semanticCache:
-      enabled: true
     telemetry:
       enabled: true
     logging:
       enabled: true
+
+# Local cache plugin configuration (top-level, sibling of bifrost / vectorStore)
+localCache:
+  config:
+    dimension: 1
 ```
 
 ## Upgrading
diff --git a/helm-charts/bifrost/scripts/generate-values.sh b/helm-charts/bifrost/scripts/generate-values.sh
index cf494b1081..277dbe753d 100755
--- a/helm-charts/bifrost/scripts/generate-values.sh
+++ b/helm-charts/bifrost/scripts/generate-values.sh
@@ -70,7 +70,7 @@ fi
 
 # Vector store
 echo ""
-echo "3. Do you need vector store for semantic caching?"
+echo "3. Do you need a vector store for the local cache (direct + semantic)?"
 read -p "Enable vector store? (y/n): " vector_choice
 
 if [[ "$vector_choice" =~ ^[Yy]$ ]]; then
@@ -356,18 +356,27 @@ cat >> "$OUTPUT_FILE" <<EOF
 EOF
 
 if [[ "$VECTOR_ENABLED" == "true" ]]; then
+    # Append client toggle and top-level local_cache block. The local cache
+    # is no longer a config_plugins entry — it's a sibling of bifrost +
+    # vectorStore at the root, with its load gated by client.enableLocalCache.
     cat >> "$OUTPUT_FILE" <<EOF
-    
-    semanticCache:
-      enabled: true
-      config:
-        provider: "openai"
-        keys:
-          - "sk-..."  # Add your OpenAI key for embeddings
-        embeddingModel: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+
+# Patch the client toggle for the local cache plugin (idempotent shim — if
+# you already set enableLocalCache above, remove this snippet by hand).
+bifrost:
+  client:
+    enableLocalCache: true
+
+# Local cache plugin configuration
+localCache:
+  keys:
+    - "sk-..."  # Add your OpenAI key for embeddings
+  config:
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
 EOF
 fi
 
diff --git a/helm-charts/bifrost/scripts/install.sh b/helm-charts/bifrost/scripts/install.sh
index 8087e1331a..7068159fd0 100755
--- a/helm-charts/bifrost/scripts/install.sh
+++ b/helm-charts/bifrost/scripts/install.sh
@@ -69,9 +69,9 @@ show_menu() {
     echo ""
     echo "  1) SQLite only (simple, local development)"
     echo "  2) PostgreSQL only (production-ready database)"
-    echo "  3) PostgreSQL + Weaviate (semantic caching with Weaviate)"
-    echo "  4) PostgreSQL + Redis (semantic caching with Redis)"
-    echo "  5) SQLite + Weaviate (local dev with semantic caching)"
+    echo "  3) PostgreSQL + Weaviate (local cache via Weaviate)"
+    echo "  4) PostgreSQL + Redis (local cache via Redis)"
+    echo "  5) SQLite + Weaviate (local dev with local cache)"
     echo "  6) SQLite + Redis (local dev with Redis caching)"
     echo "  7) External PostgreSQL (use your own database)"
     echo "  8) Production HA (high-availability setup)"
diff --git a/helm-charts/bifrost/templates/_helpers.tpl b/helm-charts/bifrost/templates/_helpers.tpl
index 0e4f3c2be5..40dde9bc41 100644
--- a/helm-charts/bifrost/templates/_helpers.tpl
+++ b/helm-charts/bifrost/templates/_helpers.tpl
@@ -215,6 +215,9 @@ false
 {{- if hasKey .Values.bifrost.client "enableLogging" }}
 {{- $_ := set $client "enable_logging" .Values.bifrost.client.enableLogging }}
 {{- end }}
+{{- if hasKey .Values.bifrost.client "enableLocalCache" }}
+{{- $_ := set $client "enable_local_cache" .Values.bifrost.client.enableLocalCache }}
+{{- end }}
 {{- if hasKey .Values.bifrost.client "enforceAuthOnInference" }}
 {{- $_ := set $client "enforce_auth_on_inference" .Values.bifrost.client.enforceAuthOnInference }}
 {{- end }}
@@ -881,6 +884,59 @@ false
 {{- end }}
 {{- $_ := set $config "vector_store" $vectorStore }}
 {{- end }}
+{{- /* Local cache plugin config (top-level "local_cache" block, sibling of
+       "client" / "vector_store"). Whether the plugin loads at boot is
+       controlled by client.enable_local_cache, rendered above; this block
+       only carries the config the plugin reads via its shared pointer. */ -}}
+{{- if .Values.localCache }}
+{{- $localCache := dict }}
+{{- $inputConfig := .Values.localCache.config | default dict }}
+{{- if $inputConfig.dimension }}
+{{- $_ := set $localCache "dimension" $inputConfig.dimension }}
+{{- end }}
+{{/* Only emit embedding provider config when not in direct-only mode (dimension: 1). */}}
+{{- if ne (int ($inputConfig.dimension | default 1536)) 1 }}
+{{- if $inputConfig.provider }}
+{{- $_ := set $localCache "provider" $inputConfig.provider }}
+{{- end }}
+{{- if .Values.localCache.keys }}
+{{- $_ := set $localCache "keys" .Values.localCache.keys }}
+{{- end }}
+{{- if $inputConfig.embedding_model }}
+{{- $_ := set $localCache "embedding_model" $inputConfig.embedding_model }}
+{{- end }}
+{{- end }}
+{{- if $inputConfig.threshold }}
+{{- $_ := set $localCache "threshold" $inputConfig.threshold }}
+{{- end }}
+{{- if $inputConfig.ttl }}
+{{- $_ := set $localCache "ttl" $inputConfig.ttl }}
+{{- end }}
+{{- if $inputConfig.vector_store_namespace }}
+{{- $_ := set $localCache "vector_store_namespace" $inputConfig.vector_store_namespace }}
+{{- end }}
+{{- if $inputConfig.default_cache_key }}
+{{- $_ := set $localCache "default_cache_key" $inputConfig.default_cache_key }}
+{{- end }}
+{{- if hasKey $inputConfig "conversation_history_threshold" }}
+{{- $_ := set $localCache "conversation_history_threshold" $inputConfig.conversation_history_threshold }}
+{{- end }}
+{{- if hasKey $inputConfig "cache_by_model" }}
+{{- $_ := set $localCache "cache_by_model" $inputConfig.cache_by_model }}
+{{- end }}
+{{- if hasKey $inputConfig "cache_by_provider" }}
+{{- $_ := set $localCache "cache_by_provider" $inputConfig.cache_by_provider }}
+{{- end }}
+{{- if hasKey $inputConfig "exclude_system_prompt" }}
+{{- $_ := set $localCache "exclude_system_prompt" $inputConfig.exclude_system_prompt }}
+{{- end }}
+{{- if hasKey $inputConfig "cleanup_on_shutdown" }}
+{{- $_ := set $localCache "cleanup_on_shutdown" $inputConfig.cleanup_on_shutdown }}
+{{- end }}
+{{- if $localCache }}
+{{- $_ := set $config "local_cache" $localCache }}
+{{- end }}
+{{- end }}
 {{- /* MCP */ -}}
 {{- if .Values.bifrost.mcp.enabled }}
 {{- $clientConfigs := list }}
@@ -1058,55 +1114,10 @@ false
 {{- if hasKey .Values.bifrost.plugins.maxim "version" }}{{- $_ := set $plugin "version" (.Values.bifrost.plugins.maxim.version | int) }}{{- end }}
 {{- $plugins = append $plugins $plugin }}
 {{- end }}
-{{- if .Values.bifrost.plugins.semanticCache.enabled }}
-{{- $scConfig := dict }}
-{{- $inputConfig := .Values.bifrost.plugins.semanticCache.config | default dict }}
-{{- if $inputConfig.dimension }}
-{{- $_ := set $scConfig "dimension" $inputConfig.dimension }}
-{{- end }}
-{{/* Only include embedding provider config when not in direct cache mode (dimension: 1) */}}
-{{- if ne (int ($inputConfig.dimension | default 1536)) 1 }}
-{{- if $inputConfig.provider }}
-{{- $_ := set $scConfig "provider" $inputConfig.provider }}
-{{- end }}
-{{- if $inputConfig.keys }}
-{{- $_ := set $scConfig "keys" $inputConfig.keys }}
-{{- end }}
-{{- if $inputConfig.embedding_model }}
-{{- $_ := set $scConfig "embedding_model" $inputConfig.embedding_model }}
-{{- end }}
-{{- end }}
-{{- if $inputConfig.threshold }}
-{{- $_ := set $scConfig "threshold" $inputConfig.threshold }}
-{{- end }}
-{{- if $inputConfig.ttl }}
-{{- $_ := set $scConfig "ttl" $inputConfig.ttl }}
-{{- end }}
-{{- if $inputConfig.vector_store_namespace }}
-{{- $_ := set $scConfig "vector_store_namespace" $inputConfig.vector_store_namespace }}
-{{- end }}
-{{- if $inputConfig.default_cache_key }}
-{{- $_ := set $scConfig "default_cache_key" $inputConfig.default_cache_key }}
-{{- end }}
-{{- if hasKey $inputConfig "conversation_history_threshold" }}
-{{- $_ := set $scConfig "conversation_history_threshold" $inputConfig.conversation_history_threshold }}
-{{- end }}
-{{- if hasKey $inputConfig "cache_by_model" }}
-{{- $_ := set $scConfig "cache_by_model" $inputConfig.cache_by_model }}
-{{- end }}
-{{- if hasKey $inputConfig "cache_by_provider" }}
-{{- $_ := set $scConfig "cache_by_provider" $inputConfig.cache_by_provider }}
-{{- end }}
-{{- if hasKey $inputConfig "exclude_system_prompt" }}
-{{- $_ := set $scConfig "exclude_system_prompt" $inputConfig.exclude_system_prompt }}
-{{- end }}
-{{- if hasKey $inputConfig "cleanup_on_shutdown" }}
-{{- $_ := set $scConfig "cleanup_on_shutdown" $inputConfig.cleanup_on_shutdown }}
-{{- end }}
-{{- $plugin := dict "enabled" true "name" "semantic_cache" "config" $scConfig }}
-{{- if hasKey .Values.bifrost.plugins.semanticCache "version" }}{{- $_ := set $plugin "version" (.Values.bifrost.plugins.semanticCache.version | int) }}{{- end }}
-{{- $plugins = append $plugins $plugin }}
-{{- end }}
+{{- /* Local cache lives at the root of config.json (top-level "local_cache"
+       block), not inside plugins[]. The toggle is on client.enable_local_cache.
+       This block only sets the config; whether the plugin loads is controlled
+       elsewhere by the enable_local_cache flag rendered into client. */ -}}
 {{- if .Values.bifrost.plugins.otel.enabled }}
 {{- $otelConfig := dict }}
 {{- $inputConfig := .Values.bifrost.plugins.otel.config | default dict }}
@@ -1262,7 +1273,7 @@ Call this template at the beginning of deployment/stateful templates
 */}}
 {{- define "bifrost.validate" -}}
 
-{{/* Validate semantic cache plugin when enabled */}}
+{{/* Validate plugin version bumps */}}
 {{- if and .Values.bifrost.plugins.telemetry.enabled (hasKey .Values.bifrost.plugins.telemetry "version") (lt (int .Values.bifrost.plugins.telemetry.version) 1) }}
 {{- fail "ERROR: bifrost.plugins.telemetry.version must be >= 1. Bump to >1 to force DB-backed plugin config updates." }}
 {{- end }}
@@ -1287,11 +1298,11 @@ Call this template at the beginning of deployment/stateful templates
 {{- if and .Values.bifrost.plugins.maxim.enabled (hasKey .Values.bifrost.plugins.maxim "version") (gt (int .Values.bifrost.plugins.maxim.version) 32767) }}
 {{- fail "ERROR: bifrost.plugins.maxim.version must be <= 32767." }}
 {{- end }}
-{{- if and .Values.bifrost.plugins.semanticCache.enabled (hasKey .Values.bifrost.plugins.semanticCache "version") (lt (int .Values.bifrost.plugins.semanticCache.version) 1) }}
-{{- fail "ERROR: bifrost.plugins.semanticCache.version must be >= 1. Bump to >1 to force DB-backed plugin config updates." }}
+{{- if and .Values.bifrost.client.enableLocalCache .Values.localCache (hasKey .Values.localCache "version") (lt (int .Values.localCache.version) 1) }}
+{{- fail "ERROR: localCache.version must be >= 1. Bump to >1 to force DB-backed plugin config updates." }}
 {{- end }}
-{{- if and .Values.bifrost.plugins.semanticCache.enabled (hasKey .Values.bifrost.plugins.semanticCache "version") (gt (int .Values.bifrost.plugins.semanticCache.version) 32767) }}
-{{- fail "ERROR: bifrost.plugins.semanticCache.version must be <= 32767." }}
+{{- if and .Values.bifrost.client.enableLocalCache .Values.localCache (hasKey .Values.localCache "version") (gt (int .Values.localCache.version) 32767) }}
+{{- fail "ERROR: localCache.version must be <= 32767." }}
 {{- end }}
 {{- if and .Values.bifrost.plugins.otel.enabled (hasKey .Values.bifrost.plugins.otel "version") (lt (int .Values.bifrost.plugins.otel.version) 1) }}
 {{- fail "ERROR: bifrost.plugins.otel.version must be >= 1. Bump to >1 to force DB-backed plugin config updates." }}
@@ -1306,15 +1317,15 @@ Call this template at the beginning of deployment/stateful templates
 {{- fail "ERROR: bifrost.plugins.datadog.version must be <= 32767." }}
 {{- end }}
 
-{{/* Validate semantic cache plugin when enabled */}}
-{{- if .Values.bifrost.plugins.semanticCache.enabled }}
-{{/* When dimension is 1, direct (hash-based) caching is used — provider and keys are not required. */}}
-{{- if ne (int .Values.bifrost.plugins.semanticCache.config.dimension) 1 }}
-{{- if not .Values.bifrost.plugins.semanticCache.config.provider }}
-{{- fail "ERROR: bifrost.plugins.semanticCache.config.provider is required for semantic caching. Supported providers: openai, anthropic, gemini, bedrock, azure, cohere, mistral, groq, ollama, openrouter, vertex, cerebras, parasail, perplexity, sgl, huggingface. For direct (hash-based) caching, set dimension: 1." }}
-{{- end }}
-{{- if not .Values.bifrost.plugins.semanticCache.config.keys }}
-{{- fail "ERROR: bifrost.plugins.semanticCache.config.keys is required for semantic caching. Provide at least one API key for the embedding provider. For direct (hash-based) caching, set dimension: 1." }}
+{{/* Validate local cache plugin when enabled */}}
+{{- if and .Values.bifrost.client.enableLocalCache .Values.localCache .Values.localCache.config }}
+{{/* Direct-only mode (dimension: 1) doesn't require provider or keys. */}}
+{{- if ne (int .Values.localCache.config.dimension) 1 }}
+{{- if not .Values.localCache.config.provider }}
+{{- fail "ERROR: localCache.config.provider is required for semantic mode. Supported providers: openai, anthropic, gemini, bedrock, azure, cohere, mistral, groq, ollama, openrouter, vertex, cerebras, parasail, perplexity, sgl, huggingface. For direct-only mode, set dimension: 1." }}
+{{- end }}
+{{- if and (not .Values.localCache.keys) (or (not .Values.localCache.secretRef) (not .Values.localCache.secretRef.name)) }}
+{{- fail "ERROR: localCache.keys (or localCache.secretRef.name) is required for semantic mode. Provide at least one API key for the embedding provider. For direct-only mode, set dimension: 1." }}
 {{- end }}
 {{- end }}
 {{- end }}
diff --git a/helm-charts/bifrost/templates/deployment.yaml b/helm-charts/bifrost/templates/deployment.yaml
index 7a65370f3e..0a5e13e923 100644
--- a/helm-charts/bifrost/templates/deployment.yaml
+++ b/helm-charts/bifrost/templates/deployment.yaml
@@ -95,12 +95,12 @@ spec:
                   name: {{ .Values.bifrost.encryptionKeySecret.name }}
                   key: {{ .Values.bifrost.encryptionKeySecret.key }}
             {{- end }}
-            {{- if and .Values.bifrost.plugins.semanticCache.enabled .Values.bifrost.plugins.semanticCache.secretRef .Values.bifrost.plugins.semanticCache.secretRef.name }}
-            - name: SEMANTIC_CACHE_API_KEY
+            {{- if and .Values.bifrost.client.enableLocalCache .Values.localCache .Values.localCache.secretRef .Values.localCache.secretRef.name }}
+            - name: LOCAL_CACHE_API_KEY
               valueFrom:
                 secretKeyRef:
-                  name: {{ .Values.bifrost.plugins.semanticCache.secretRef.name }}
-                  key: {{ .Values.bifrost.plugins.semanticCache.secretRef.key | default "api-key" }}
+                  name: {{ .Values.localCache.secretRef.name }}
+                  key: {{ .Values.localCache.secretRef.key | default "api-key" }}
             {{- end }}
             {{- /* PostgreSQL password from existing secret */ -}}
             {{- if and .Values.postgresql.external.enabled .Values.postgresql.external.existingSecret }}
diff --git a/helm-charts/bifrost/templates/stateful.yaml b/helm-charts/bifrost/templates/stateful.yaml
index e6c231b788..5eb0d4f69e 100644
--- a/helm-charts/bifrost/templates/stateful.yaml
+++ b/helm-charts/bifrost/templates/stateful.yaml
@@ -100,12 +100,12 @@ spec:
                   name: {{ .Values.bifrost.encryptionKeySecret.name }}
                   key: {{ .Values.bifrost.encryptionKeySecret.key }}
             {{- end }}
-            {{- if and .Values.bifrost.plugins.semanticCache.enabled .Values.bifrost.plugins.semanticCache.secretRef .Values.bifrost.plugins.semanticCache.secretRef.name }}
-            - name: SEMANTIC_CACHE_API_KEY
+            {{- if and .Values.bifrost.client.enableLocalCache .Values.localCache .Values.localCache.secretRef .Values.localCache.secretRef.name }}
+            - name: LOCAL_CACHE_API_KEY
               valueFrom:
                 secretKeyRef:
-                  name: {{ .Values.bifrost.plugins.semanticCache.secretRef.name }}
-                  key: {{ .Values.bifrost.plugins.semanticCache.secretRef.key | default "api-key" }}
+                  name: {{ .Values.localCache.secretRef.name }}
+                  key: {{ .Values.localCache.secretRef.key | default "api-key" }}
             {{- end }}
             {{- /* PostgreSQL password from existing secret */ -}}
             {{- if and .Values.postgresql.external.enabled .Values.postgresql.external.existingSecret }}
diff --git a/helm-charts/bifrost/values-examples/semantic-cache-secret-example.yaml b/helm-charts/bifrost/values-examples/local-cache-secret-example.yaml
similarity index 71%
rename from helm-charts/bifrost/values-examples/semantic-cache-secret-example.yaml
rename to helm-charts/bifrost/values-examples/local-cache-secret-example.yaml
index fb1a2dd56d..b657510b97 100644
--- a/helm-charts/bifrost/values-examples/semantic-cache-secret-example.yaml
+++ b/helm-charts/bifrost/values-examples/local-cache-secret-example.yaml
@@ -1,28 +1,27 @@
-# Example Kubernetes Secret for Semantic Cache API Key
+# Example Kubernetes Secret for Local Cache API Key
 # This secret is referenced by production-ha.yaml
 #
 # IMPORTANT: Do not commit this file with real API keys to version control!
 #
 # Usage:
 #   1. Replace 'YOUR_OPENAI_API_KEY' with your actual OpenAI API key
-#   2. Apply the secret: kubectl apply -f semantic-cache-secret-example.yaml -n <namespace>
+#   2. Apply the secret: kubectl apply -f local-cache-secret-example.yaml -n <namespace>
 #   3. Deploy Bifrost with: helm install bifrost . -f values-examples/production-ha.yaml -n <namespace>
 #
 # Alternative: Create the secret using kubectl command:
-#   kubectl create secret generic bifrost-semantic-cache \
+#   kubectl create secret generic bifrost-local-cache \
 #     --from-literal=openai-key=sk-YOUR_OPENAI_API_KEY \
 #     -n <namespace>
 
 apiVersion: v1
 kind: Secret
 metadata:
-  name: bifrost-semantic-cache
+  name: bifrost-local-cache
   namespace: default  # Change this to your target namespace
   labels:
     app.kubernetes.io/name: bifrost
-    app.kubernetes.io/component: semantic-cache
+    app.kubernetes.io/component: local-cache
 type: Opaque
 stringData:
   # Replace with your actual OpenAI API key
   openai-key: "sk-YOUR_OPENAI_API_KEY"
-
diff --git a/helm-charts/bifrost/values-examples/postgres-qdrant.yaml b/helm-charts/bifrost/values-examples/postgres-qdrant.yaml
index 16faf96745..e1b736c67e 100644
--- a/helm-charts/bifrost/values-examples/postgres-qdrant.yaml
+++ b/helm-charts/bifrost/values-examples/postgres-qdrant.yaml
@@ -58,25 +58,25 @@ vectorStore:
 bifrost:
   client:
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   providers: {}
     # Add your provider keys here
 
-  # Enable semantic cache plugin to use Qdrant vector store
-  plugins:
-    semanticCache:
-      enabled: true
-      # OPTION 1 (Recommended): Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      # OPTION 2 (Not recommended): Or uncomment to provide keys directly (not secure)
-      # Remove secretRef above and uncomment the keys below:
-      config:
-        provider: "openai"
-        # keys:
-        #   - "REPLACE_WITH_OPENAI_API_KEY"  # Not recommended: use secretRef instead
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # OPTION 1 (Recommended): Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  # OPTION 2 (Not recommended): Or uncomment to provide keys directly (not secure)
+  # Remove secretRef above and uncomment the keys below:
+  # keys:
+  #   - "REPLACE_WITH_OPENAI_API_KEY"  # Not recommended: use secretRef instead
+  config:
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
diff --git a/helm-charts/bifrost/values-examples/postgres-redis.yaml b/helm-charts/bifrost/values-examples/postgres-redis.yaml
index 6e98662b27..24c88f85c0 100644
--- a/helm-charts/bifrost/values-examples/postgres-redis.yaml
+++ b/helm-charts/bifrost/values-examples/postgres-redis.yaml
@@ -53,23 +53,23 @@ vectorStore:
 bifrost:
   client:
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   providers: {}
     # Add your provider keys here
-  
-  # Enable semantic cache plugin to use Redis vector store
-  plugins:
-    semanticCache:
-      enabled: true
-      # Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      config:
-        provider: "openai"
-        # keys are injected from the secret via environment variable
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  config:
+    provider: "openai"
+    # keys are injected from the secret via environment variable
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
 
diff --git a/helm-charts/bifrost/values-examples/postgres-weaviate.yaml b/helm-charts/bifrost/values-examples/postgres-weaviate.yaml
index bcd44ffa17..ec809f7fea 100644
--- a/helm-charts/bifrost/values-examples/postgres-weaviate.yaml
+++ b/helm-charts/bifrost/values-examples/postgres-weaviate.yaml
@@ -50,23 +50,23 @@ vectorStore:
 bifrost:
   client:
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   providers: {}
     # Add your provider keys here
-  
-  # Enable semantic cache plugin to use vector store
-  plugins:
-    semanticCache:
-      enabled: true
-      # Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      config:
-        provider: "openai"
-        # keys are injected from the secret via environment variable
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  config:
+    provider: "openai"
+    # keys are injected from the secret via environment variable
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
 
diff --git a/helm-charts/bifrost/values-examples/production-ha.yaml b/helm-charts/bifrost/values-examples/production-ha.yaml
index 720cc4dd67..064f93099d 100644
--- a/helm-charts/bifrost/values-examples/production-ha.yaml
+++ b/helm-charts/bifrost/values-examples/production-ha.yaml
@@ -67,7 +67,7 @@ postgresql:
         cpu: 1000m
         memory: 2Gi
 
-# Weaviate for semantic caching
+# Weaviate for the local cache
 vectorStore:
   enabled: true
   type: weaviate
@@ -99,6 +99,8 @@ bifrost:
       - "https://yourdomain.com"
       - "https://app.yourdomain.com"
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
     maxRequestBodySizeMb: 100
   
   providers: {}
@@ -113,21 +115,21 @@ bifrost:
       enabled: true
       config: {}
     
-    semanticCache:
-      enabled: true
-      # Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      config:
-        provider: "openai"
-        # keys are injected from the secret via environment variable
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.85
-        ttl: "1h"
-        conversation_history_threshold: 5
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  config:
+    provider: "openai"
+    # keys are injected from the secret via environment variable
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.85
+    ttl: "1h"
+    conversation_history_threshold: 5
 
 # Pod affinity for better distribution
 affinity:
diff --git a/helm-charts/bifrost/values-examples/sqlite-qdrant.yaml b/helm-charts/bifrost/values-examples/sqlite-qdrant.yaml
index 53aafa052a..e14bc4521a 100644
--- a/helm-charts/bifrost/values-examples/sqlite-qdrant.yaml
+++ b/helm-charts/bifrost/values-examples/sqlite-qdrant.yaml
@@ -37,22 +37,22 @@ vectorStore:
 bifrost:
   client:
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   providers: {}
     # Add your provider keys here
 
-  # Enable semantic cache plugin to use vector store
-  plugins:
-    semanticCache:
-      enabled: true
-      # Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      config:
-        provider: "openai"
-        # keys are injected from the secret via environment variable
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  config:
+    provider: "openai"
+    # keys are injected from the secret via environment variable
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
diff --git a/helm-charts/bifrost/values-examples/sqlite-redis.yaml b/helm-charts/bifrost/values-examples/sqlite-redis.yaml
index 03a8aa6712..ca24f6b0e2 100644
--- a/helm-charts/bifrost/values-examples/sqlite-redis.yaml
+++ b/helm-charts/bifrost/values-examples/sqlite-redis.yaml
@@ -51,26 +51,26 @@ vectorStore:
 bifrost:
   client:
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   providers: {}
     # Add your provider keys here
-  
-  # Enable semantic cache plugin to use Redis vector store
-  plugins:
-    semanticCache:
-      enabled: true
-      # OPTION 1 (Recommended): Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      # OPTION 2 (Not recommended): Or uncomment to provide keys directly (not secure)
-      # Remove secretRef above and uncomment the keys below:
-      config:
-        provider: "openai"
-        # keys:
-        #   - "REPLACE_WITH_OPENAI_API_KEY"  # Not recommended: use secretRef instead
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # OPTION 1 (Recommended): Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  # OPTION 2 (Not recommended): Or uncomment to provide keys directly (not secure)
+  # Remove secretRef above and uncomment the keys below:
+  # keys:
+  #   - "REPLACE_WITH_OPENAI_API_KEY"  # Not recommended: use secretRef instead
+  config:
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
 
diff --git a/helm-charts/bifrost/values-examples/sqlite-weaviate.yaml b/helm-charts/bifrost/values-examples/sqlite-weaviate.yaml
index 0a6b50d2a2..7583141356 100644
--- a/helm-charts/bifrost/values-examples/sqlite-weaviate.yaml
+++ b/helm-charts/bifrost/values-examples/sqlite-weaviate.yaml
@@ -38,23 +38,23 @@ vectorStore:
 bifrost:
   client:
     enableLogging: true
+    # Loads the local cache plugin (direct + semantic) at boot.
+    enableLocalCache: true
   providers: {}
     # Add your provider keys here
-  
-  # Enable semantic cache plugin to use vector store
-  plugins:
-    semanticCache:
-      enabled: true
-      # Reference to external Kubernetes Secret for OpenAI API key
-      # Create the secret with: kubectl create secret generic bifrost-semantic-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
-      secretRef:
-        name: "bifrost-semantic-cache"
-        key: "openai-key"
-      config:
-        provider: "openai"
-        # keys are injected from the secret via environment variable
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
+
+# Local cache plugin configuration (sibling of bifrost / vectorStore)
+localCache:
+  # Reference to external Kubernetes Secret for OpenAI API key
+  # Create the secret with: kubectl create secret generic bifrost-local-cache --from-literal=openai-key=sk-YOUR_OPENAI_KEY
+  secretRef:
+    name: "bifrost-local-cache"
+    key: "openai-key"
+  config:
+    provider: "openai"
+    # keys are injected from the secret via environment variable
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
 
diff --git a/helm-charts/bifrost/values.schema.json b/helm-charts/bifrost/values.schema.json
index 749890cd46..3da1fac47b 100644
--- a/helm-charts/bifrost/values.schema.json
+++ b/helm-charts/bifrost/values.schema.json
@@ -287,6 +287,10 @@
               "type": "boolean",
               "description": "Enable request/response logging"
             },
+            "enableLocalCache": {
+              "type": "boolean",
+              "description": "Enable the local cache plugin (direct + semantic). Requires the top-level localCache block and a configured vectorStore."
+            },
             "disableContentLogging": {
               "type": "boolean",
               "description": "Disable logging of sensitive content (inputs, outputs, embeddings, etc.)"
@@ -747,109 +751,6 @@
                 ]
               }
             },
-            "semanticCache": {
-              "type": "object",
-              "properties": {
-                "enabled": {
-                  "type": "boolean"
-                },
-                "config": {
-                  "type": "object",
-                  "properties": {
-                    "provider": {
-                      "type": "string",
-                      "enum": [
-                        "openai",
-                        "anthropic",
-                        "gemini",
-                        "bedrock",
-                        "azure",
-                        "cohere",
-                        "mistral",
-                        "groq",
-                        "ollama",
-                        "openrouter",
-                        "vertex",
-                        "cerebras",
-                        "parasail",
-                        "perplexity",
-                        "sgl",
-                        "huggingface"
-                      ]
-                    },
-                    "keys": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "API keys for embedding provider (required for semantic caching, not needed for direct caching with dimension: 1)"
-                    },
-                    "embedding_model": {
-                      "type": "string"
-                    },
-                    "dimension": {
-                      "type": "integer",
-                      "minimum": 1
-                    },
-                    "threshold": {
-                      "type": "number",
-                      "minimum": 0,
-                      "maximum": 1
-                    },
-                    "ttl": {
-                      "description": "Time-to-live for cached responses (supports duration strings like '5m', '1h' or seconds as number, default: 5min)",
-                      "oneOf": [
-                        {
-                          "type": "string",
-                          "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$"
-                        },
-                        {
-                          "type": "integer",
-                          "minimum": 0
-                        }
-                      ]
-                    },
-                    "conversation_history_threshold": {
-                      "type": "integer",
-                      "minimum": 0
-                    },
-                    "cache_by_model": {
-                      "type": "boolean"
-                    },
-                    "cache_by_provider": {
-                      "type": "boolean"
-                    },
-                    "exclude_system_prompt": {
-                      "type": "boolean"
-                    },
-                    "cleanup_on_shutdown": {
-                      "type": "boolean"
-                    },
-                    "vector_store_namespace": {
-                      "type": "string"
-                    },
-                    "default_cache_key": {
-                      "type": "string",
-                      "description": "Default cache key for semantic cache lookups"
-                    }
-                  }
-                }
-              },
-              "if": {
-                "properties": {
-                  "enabled": {
-                    "const": true
-                  }
-                }
-              },
-              "then": {
-                "properties": {
-                  "config": {
-                    "required": ["dimension"]
-                  }
-                }
-              }
-            },
             "otel": {
               "type": "object",
               "properties": {
@@ -2538,6 +2439,93 @@
         }
       }
     },
+    "localCache": {
+      "type": "object",
+      "description": "Local cache plugin configuration (top-level, sibling of bifrost / vectorStore). Loaded only when bifrost.client.enableLocalCache is true.",
+      "properties": {
+        "version": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 32767
+        },
+        "keys": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "API keys for the embedding provider (required for semantic mode, not needed for direct-only mode with dimension: 1). Use secretRef instead in production."
+        },
+        "secretRef": {
+          "type": "object",
+          "properties": {
+            "name": { "type": "string" },
+            "key": { "type": "string" }
+          }
+        },
+        "config": {
+          "type": "object",
+          "properties": {
+            "provider": {
+              "type": "string",
+              "enum": [
+                "",
+                "openai",
+                "anthropic",
+                "gemini",
+                "bedrock",
+                "azure",
+                "cohere",
+                "mistral",
+                "groq",
+                "ollama",
+                "openrouter",
+                "vertex",
+                "cerebras",
+                "parasail",
+                "perplexity",
+                "sgl",
+                "huggingface"
+              ]
+            },
+            "embedding_model": { "type": "string" },
+            "dimension": {
+              "type": "integer",
+              "minimum": 1
+            },
+            "threshold": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1
+            },
+            "ttl": {
+              "description": "Time-to-live for cached responses (duration string like '5m' or integer seconds).",
+              "oneOf": [
+                {
+                  "type": "string",
+                  "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$"
+                },
+                {
+                  "type": "integer",
+                  "minimum": 0
+                }
+              ]
+            },
+            "conversation_history_threshold": {
+              "type": "integer",
+              "minimum": 0
+            },
+            "cache_by_model": { "type": "boolean" },
+            "cache_by_provider": { "type": "boolean" },
+            "exclude_system_prompt": { "type": "boolean" },
+            "cleanup_on_shutdown": { "type": "boolean" },
+            "vector_store_namespace": { "type": "string" },
+            "default_cache_key": {
+              "type": "string",
+              "description": "Cache key used when no per-request key is supplied."
+            }
+          },
+          "required": ["dimension"]
+        }
+      }
+    },
     "vectorStore": {
       "type": "object",
       "properties": {
diff --git a/helm-charts/bifrost/values.yaml b/helm-charts/bifrost/values.yaml
index fd6a6712d7..2417d60b5a 100644
--- a/helm-charts/bifrost/values.yaml
+++ b/helm-charts/bifrost/values.yaml
@@ -199,6 +199,7 @@ bifrost:
     allowedOrigins:
       - "*"
     enableLogging: true
+    enableLocalCache: false
     disableContentLogging: false
     disableDbPingsInHealth: false
     logRetentionDays: 365
@@ -405,25 +406,6 @@ bifrost:
         name: ""
         key: "api-key"
 
-    semanticCache:
-      enabled: false
-      version: 1
-      config:
-        # Semantic caching mode (dimension > 1): requires provider, keys, and embedding_model
-        # Direct caching mode (dimension: 1): hash-based exact matching, no embedding provider needed
-        provider: "openai"
-        keys: []
-        embedding_model: "text-embedding-3-small"
-        dimension: 1536
-        threshold: 0.8
-        ttl: "5m"
-        conversation_history_threshold: 3
-        cache_by_model: true
-        cache_by_provider: true
-        exclude_system_prompt: false
-        cleanup_on_shutdown: false
-        vector_store_namespace: ""
-
     otel:
       enabled: false
       version: 1
@@ -803,9 +785,37 @@ postgresql:
   metrics:
     enabled: false
 
+# Local cache plugin configuration. Loaded only when bifrost.client.enableLocalCache is true.
+# Pure config changes mutate the live plugin in place via PUT /api/local-cache/config —
+# no plugin reload, no restart. Provider-backed semantic mode requires `provider`, `keys`,
+# and `embedding_model`; direct-only mode is `dimension: 1` with no provider.
+localCache:
+  version: 1
+  # Embedding provider keys for the local cache (used in semantic mode only).
+  # Format: ["env.OPENAI_API_KEY"] or literal values.
+  keys: []
+  # Use existing Kubernetes secret for the embedding API key (takes precedence over keys).
+  secretRef:
+    name: ""
+    key: "api-key"
+  config:
+    # Semantic mode (dimension > 1): requires provider + keys + embedding_model.
+    # Direct-only mode (dimension: 1): no embedding provider needed.
+    provider: "openai"
+    embedding_model: "text-embedding-3-small"
+    dimension: 1536
+    threshold: 0.8
+    ttl: "5m"
+    conversation_history_threshold: 3
+    cache_by_model: true
+    cache_by_provider: true
+    exclude_system_prompt: false
+    cleanup_on_shutdown: false
+    vector_store_namespace: ""
+
 # Vector store configuration
 vectorStore:
-  # Enable vector store for semantic caching
+  # Enable vector store for the local cache plugin
   enabled: false
   type: none # Options: none, weaviate, redis, qdrant
 
diff --git a/nix/packages/bifrost-http.nix b/nix/packages/bifrost-http.nix
index 0d05dd1e59..4c41850892 100644
--- a/nix/packages/bifrost-http.nix
+++ b/nix/packages/bifrost-http.nix
@@ -21,10 +21,10 @@ let
     replace github.com/maximhq/bifrost/framework => ../framework
     replace github.com/maximhq/bifrost/plugins/governance => ../plugins/governance
     replace github.com/maximhq/bifrost/plugins/compat => ../plugins/compat
+    replace github.com/maximhq/bifrost/plugins/localcache => ../plugins/localcache
     replace github.com/maximhq/bifrost/plugins/logging => ../plugins/logging
     replace github.com/maximhq/bifrost/plugins/maxim => ../plugins/maxim
     replace github.com/maximhq/bifrost/plugins/otel => ../plugins/otel
-    replace github.com/maximhq/bifrost/plugins/semanticcache => ../plugins/semanticcache
     replace github.com/maximhq/bifrost/plugins/telemetry => ../plugins/telemetry
     EOF
     fi
diff --git a/plugins/semanticcache/changelog.md b/plugins/localcache/changelog.md
similarity index 100%
rename from plugins/semanticcache/changelog.md
rename to plugins/localcache/changelog.md
diff --git a/plugins/semanticcache/config_unmarshal_test.go b/plugins/localcache/config_unmarshal_test.go
similarity index 99%
rename from plugins/semanticcache/config_unmarshal_test.go
rename to plugins/localcache/config_unmarshal_test.go
index d38ad31fc6..1885585247 100644
--- a/plugins/semanticcache/config_unmarshal_test.go
+++ b/plugins/localcache/config_unmarshal_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"encoding/json"
diff --git a/plugins/semanticcache/go.mod b/plugins/localcache/go.mod
similarity index 73%
rename from plugins/semanticcache/go.mod
rename to plugins/localcache/go.mod
index c3186d6783..37bf7d9336 100644
--- a/plugins/semanticcache/go.mod
+++ b/plugins/localcache/go.mod
@@ -1,4 +1,4 @@
-module github.com/maximhq/bifrost/plugins/semanticcache
+module github.com/maximhq/bifrost/plugins/localcache
 
 go 1.26.2
 
@@ -11,12 +11,21 @@ require (
 )
 
 require (
+	cel.dev/expr v0.25.1 // indirect
 	cloud.google.com/go v0.123.0 // indirect
+	cloud.google.com/go/auth v0.18.2 // indirect
+	cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
 	cloud.google.com/go/compute/metadata v0.9.0 // indirect
+	cloud.google.com/go/iam v1.5.3 // indirect
+	cloud.google.com/go/monitoring v1.24.3 // indirect
+	cloud.google.com/go/storage v1.61.3 // indirect
 	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect
 	github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect
 	github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
 	github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
+	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 // indirect
+	github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 // indirect
+	github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 // indirect
 	github.com/andybalholm/brotli v1.2.0 // indirect
 	github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
 	github.com/aws/aws-sdk-go-v2 v1.41.5 // indirect
@@ -44,8 +53,13 @@ require (
 	github.com/bytedance/sonic v1.15.0 // indirect
 	github.com/bytedance/sonic/loader v0.5.0 // indirect
 	github.com/cloudwego/base64x v0.1.6 // indirect
+	github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
+	github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/go-jose/go-jose/v4 v4.1.4 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-openapi/analysis v0.24.2 // indirect
@@ -71,8 +85,17 @@ require (
 	github.com/go-openapi/validate v0.25.1 // indirect
 	github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
 	github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
+	github.com/google/s2a-go v0.1.9 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect
+	github.com/googleapis/gax-go/v2 v2.19.0 // indirect
 	github.com/invopop/jsonschema v0.13.0 // indirect
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
+	github.com/jackc/pgx/v5 v5.9.1 // indirect
+	github.com/jackc/puddle/v2 v2.2.2 // indirect
 	github.com/jaswdr/faker/v2 v2.8.0 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
 	github.com/klauspost/compress v1.18.2 // indirect
 	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/kylelemons/godebug v1.1.0 // indirect
@@ -80,16 +103,19 @@ require (
 	github.com/mark3labs/mcp-go v0.43.2 // indirect
 	github.com/mattn/go-colorable v0.1.14 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mattn/go-sqlite3 v1.14.32 // indirect
 	github.com/oapi-codegen/runtime v1.1.1 // indirect
 	github.com/oklog/ulid v1.3.1 // indirect
 	github.com/pinecone-io/go-pinecone/v5 v5.3.0 // indirect
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/qdrant/go-client v1.16.2 // indirect
 	github.com/redis/go-redis/v9 v9.17.2 // indirect
 	github.com/rs/zerolog v1.34.0 // indirect
 	github.com/spf13/cast v1.10.0 // indirect
+	github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect
 	github.com/stretchr/testify v1.11.1 // indirect
 	github.com/tidwall/gjson v1.18.0 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
@@ -104,8 +130,13 @@ require (
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
 	go.mongodb.org/mongo-driver v1.17.6 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
+	go.opentelemetry.io/contrib/detectors/gcp v1.40.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
 	go.opentelemetry.io/otel v1.43.0 // indirect
 	go.opentelemetry.io/otel/metric v1.43.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.43.0 // indirect
+	go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect
 	go.opentelemetry.io/otel/trace v1.43.0 // indirect
 	go.starlark.net v0.0.0-20260102030733-3fee463870c9 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
@@ -116,9 +147,15 @@ require (
 	golang.org/x/sync v0.20.0 // indirect
 	golang.org/x/sys v0.42.0 // indirect
 	golang.org/x/text v0.35.0 // indirect
+	golang.org/x/time v0.15.0 // indirect
+	google.golang.org/api v0.274.0 // indirect
+	google.golang.org/genproto v0.0.0-20260316180232-0b37fe3546d5 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
 	google.golang.org/grpc v1.80.0 // indirect
 	google.golang.org/protobuf v1.36.11 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
+	gorm.io/driver/postgres v1.6.0 // indirect
+	gorm.io/driver/sqlite v1.6.0 // indirect
+	gorm.io/gorm v1.31.1 // indirect
 )
diff --git a/plugins/semanticcache/go.sum b/plugins/localcache/go.sum
similarity index 77%
rename from plugins/semanticcache/go.sum
rename to plugins/localcache/go.sum
index 6e3b7b4926..55ce9818a6 100644
--- a/plugins/semanticcache/go.sum
+++ b/plugins/localcache/go.sum
@@ -1,7 +1,25 @@
+cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4=
+cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4=
 cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE=
 cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU=
+cloud.google.com/go/auth v0.18.2 h1:+Nbt5Ev0xEqxlNjd6c+yYUeosQ5TtEUaNcN/3FozlaM=
+cloud.google.com/go/auth v0.18.2/go.mod h1:xD+oY7gcahcu7G2SG2DsBerfFxgPAJz17zz2joOFF3M=
+cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
+cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
 cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
 cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
+cloud.google.com/go/iam v1.5.3 h1:+vMINPiDF2ognBJ97ABAYYwRgsaqxPbQDlMnbHMjolc=
+cloud.google.com/go/iam v1.5.3/go.mod h1:MR3v9oLkZCTlaqljW6Eb2d3HGDGK5/bDv93jhfISFvU=
+cloud.google.com/go/logging v1.13.2 h1:qqlHCBvieJT9Cdq4QqYx1KPadCQ2noD4FK02eNqHAjA=
+cloud.google.com/go/logging v1.13.2/go.mod h1:zaybliM3yun1J8mU2dVQ1/qDzjbOqEijZCn6hSBtKak=
+cloud.google.com/go/longrunning v0.8.0 h1:LiKK77J3bx5gDLi4SMViHixjD2ohlkwBi+mKA7EhfW8=
+cloud.google.com/go/longrunning v0.8.0/go.mod h1:UmErU2Onzi+fKDg2gR7dusz11Pe26aknR4kHmJJqIfk=
+cloud.google.com/go/monitoring v1.24.3 h1:dde+gMNc0UhPZD1Azu6at2e79bfdztVDS5lvhOdsgaE=
+cloud.google.com/go/monitoring v1.24.3/go.mod h1:nYP6W0tm3N9H/bOw8am7t62YTzZY+zUeQ+Bi6+2eonI=
+cloud.google.com/go/storage v1.61.3 h1:VS//ZfBuPGDvakfD9xyPW1RGF1Vy3BWUoVZXgW1KMOg=
+cloud.google.com/go/storage v1.61.3/go.mod h1:JtqK8BBB7TWv0HVGHubtUdzYYrakOQIsMLffZ2Z/HWk=
+cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U=
+cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s=
 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc=
 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw=
 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4=
@@ -14,6 +32,14 @@ github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJ
 github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
 github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs=
 github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 h1:DHa2U07rk8syqvCge0QIGMCE1WxGj9njT44GH7zNJLQ=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 h1:UnDZ/zFfG1JhH/DqxIZYU/1CUAlTUScoXD/LcM2Ykk8=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0/go.mod h1:IA1C1U7jO/ENqm/vhi7V9YYpBsp+IMyqNrEN94N7tVc=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.55.0 h1:7t/qx5Ost0s0wbA/VDrByOooURhp+ikYwv20i9Y07TQ=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.55.0/go.mod h1:vB2GH9GAYYJTO3mEn8oYwzEdhlayZIdQz6zdzgUIRvA=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 h1:0s6TxfCu2KHkkZPnBfsQ2y5qia0jl3MMrmBhu3nCOYk=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0/go.mod h1:Mf6O40IAyB9zR/1J8nGDDPirZQQPbYJni8Yisy7NTMc=
 github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
 github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
 github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
@@ -76,6 +102,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
 github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
+github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w=
+github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -83,10 +111,22 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
+github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA=
+github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU=
+github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g=
+github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98=
+github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI=
+github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
+github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4=
+github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA=
 github.com/fasthttp/websocket v1.5.12 h1:e4RGPpWW2HTbL3zV0Y/t7g0ub294LkiuXXUuTOUInlE=
 github.com/fasthttp/websocket v1.5.12/go.mod h1:I+liyL7/4moHojiOgUOIKEWm9EIxHqxZChS+aMFltyg=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
 github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA=
+github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@@ -149,14 +189,34 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc=
+github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0=
+github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
+github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8=
+github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg=
+github.com/googleapis/gax-go/v2 v2.19.0 h1:fYQaUOiGwll0cGj7jmHT/0nPlcrZDFPrZRhTsoCr8hE=
+github.com/googleapis/gax-go/v2 v2.19.0/go.mod h1:w2ROXVdfGEVFXzmlciUU4EdjHgWvB5h2n6x/8XSTTJA=
 github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
 github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
 github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
 github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.9.1 h1:uwrxJXBnx76nyISkhr33kQLlUqjv7et7b9FjCen/tdc=
+github.com/jackc/pgx/v5 v5.9.1/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
+github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
+github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
 github.com/jaswdr/faker/v2 v2.8.0 h1:3AxdXW9U7dJmWckh/P0YgRbNlCcVsTyrUNUnLVP9b3Q=
 github.com/jaswdr/faker/v2 v2.8.0/go.mod h1:jZq+qzNQr8/P+5fHd9t3txe2GNPnthrTfohtnJ7B+68=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
 github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
 github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU=
 github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k=
@@ -181,6 +241,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
+github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/maximhq/bifrost/core v1.5.7 h1:XZYHpg/JjOVuBWuJmpupqZvbVNZw7boc81QQBSLoIBo=
 github.com/maximhq/bifrost/core v1.5.7/go.mod h1:z1/vOalbDAD7v7sYbXQsqR+2qIFP0jKOSIStw6Q4P4U=
 github.com/maximhq/bifrost/framework v1.3.7 h1:KOsXYoiFgB4F2gugybz2h+z17wKqQNeS+pHdEzBOHAc=
@@ -197,6 +259,8 @@ github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmd
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -213,6 +277,8 @@ github.com/savsgio/gotils v0.0.0-20250408102913-196191ec6287 h1:qIQ0tWF9vxGtkJa2
 github.com/savsgio/gotils v0.0.0-20250408102913-196191ec6287/go.mod h1:sM7Mt7uEoCeFSCBM+qBrqvEo+/9vdmj19wzp3yzUhmg=
 github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
 github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
+github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo=
+github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs=
 github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
@@ -221,6 +287,7 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4=
 github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
@@ -256,8 +323,16 @@ go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUps
 go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
+go.opentelemetry.io/contrib/detectors/gcp v1.40.0 h1:Awaf8gmW99tZTOWqkLCOl6aw1/rxAWVlHsHIZ3fT2sA=
+go.opentelemetry.io/contrib/detectors/gcp v1.40.0/go.mod h1:99OY9ZCqyLkzJLTh5XhECpLRSxcZl+ZDKBEO+jMBFR4=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
 go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
 go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
+go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.40.0 h1:ZrPRak/kS4xI3AVXy8F7pipuDXmDsrO8Lg+yQjBLjw0=
+go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.40.0/go.mod h1:3y6kQCWztq6hyW8Z9YxQDDm0Je9AJoFar2G0yDcmhRk=
 go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
 go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
 go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
@@ -288,8 +363,14 @@ golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
 golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
 golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
+golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
 gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
 gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
+google.golang.org/api v0.274.0 h1:aYhycS5QQCwxHLwfEHRRLf9yNsfvp1JadKKWBE54RFA=
+google.golang.org/api v0.274.0/go.mod h1:JbAt7mF+XVmWu6xNP8/+CTiGH30ofmCmk9nM8d8fHew=
+google.golang.org/genproto v0.0.0-20260316180232-0b37fe3546d5 h1:JNfk58HZ8lfmXbYK2vx/UvsqIL59TzByCxPIX4TDmsE=
+google.golang.org/genproto v0.0.0-20260316180232-0b37fe3546d5/go.mod h1:x5julN69+ED4PcFk/XWayw35O0lf/nGa4aNgODCmNmw=
 google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
 google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg=
@@ -304,3 +385,9 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/driver/postgres v1.6.0 h1:2dxzU8xJ+ivvqTRph34QX+WrRaJlmfyPqXmoGVjMBa4=
+gorm.io/driver/postgres v1.6.0/go.mod h1:vUw0mrGgrTK+uPHEhAdV4sfFELrByKVGnaVRkXDhtWo=
+gorm.io/driver/sqlite v1.6.0 h1:WHRRrIiulaPiPFmDcod6prc4l2VGVWHz80KspNsxSfQ=
+gorm.io/driver/sqlite v1.6.0/go.mod h1:AO9V1qIQddBESngQUKWL9yoH93HIeA1X6V633rBwyT8=
+gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg=
+gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=
diff --git a/plugins/semanticcache/main.go b/plugins/localcache/main.go
similarity index 82%
rename from plugins/semanticcache/main.go
rename to plugins/localcache/main.go
index e54f753a39..918de4d3ac 100644
--- a/plugins/semanticcache/main.go
+++ b/plugins/localcache/main.go
@@ -1,91 +1,33 @@
-// Package semanticcache provides semantic caching integration for Bifrost plugin.
-// This plugin caches responses using both direct hash matching (xxhash) and semantic similarity search (embeddings).
-// It supports configurable caching behavior via the VectorStore abstraction, with TTL management and streaming response handling.
-package semanticcache
+// Package localcache provides local caching integration for Bifrost.
+// The plugin caches responses via two complementary lookup paths: a direct
+// hash match (xxhash) for exact replays, and embedding-based similarity
+// search for semantically related content. It uses the VectorStore
+// abstraction for storage, with TTL management and streaming response
+// handling.
+//
+// The plugin holds a *configstore.LocalCacheConfig pointer that is shared
+// with the framework. PUT /api/local-cache/config mutates that struct in
+// place, so the plugin sees fresh values on the next request without a
+// restart.
+package localcache
 
 import (
 	"context"
-	"encoding/json"
 	"fmt"
 	"sync"
 	"time"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
+	"github.com/maximhq/bifrost/framework/configstore"
 	"github.com/maximhq/bifrost/framework/vectorstore"
 )
 
-// Config contains configuration for the semantic cache plugin.
-// The VectorStore abstraction handles the underlying storage implementation and its defaults.
-// Only specify values you want to override from the semantic cache defaults.
-//
-// Modes:
-//   - Semantic mode: set Provider + EmbeddingModel + Dimension > 0. Both direct
-//     hash matching and embedding-based similarity search are enabled.
-//   - Direct-only mode: set Provider="" and Dimension=1. The plugin disables
-//     semantic search entirely; cache lookups go through the deterministic
-//     direct hash path. Dimension=1 keeps stores that require a vector happy.
-type Config struct {
-	// Embedding Model settings - REQUIRED for semantic caching
-	Provider       schemas.ModelProvider `json:"provider"`
-	EmbeddingModel string                `json:"embedding_model,omitempty"` // Model to use for generating embeddings (optional)
-
-	// Plugin behavior settings
-	CleanUpOnShutdown    bool          `json:"cleanup_on_shutdown,omitempty"`    // Clean up cache on shutdown (default: false)
-	TTL                  time.Duration `json:"ttl,omitempty"`                    // Time-to-live for cached responses (default: 5min)
-	Threshold            float64       `json:"threshold,omitempty"`              // Cosine similarity threshold for semantic matching (0 = unset → default 0.8)
-	VectorStoreNamespace string        `json:"vector_store_namespace,omitempty"` // Namespace for vector store (optional)
-	Dimension            int           `json:"dimension"`                        // Dimension for vector store (must be > 0 when Provider is set; use 1 for direct-only mode)
-
-	// Advanced caching behavior
-	DefaultCacheKey              string `json:"default_cache_key,omitempty"`              // Default cache key used when no per-request key is provided (optional, caching is disabled when empty and no per-request key is set)
-	ConversationHistoryThreshold int    `json:"conversation_history_threshold,omitempty"` // Skip caching for requests with more than this number of messages in the conversation history (default: 3)
-	CacheByModel                 *bool  `json:"cache_by_model,omitempty"`                 // Include model in cache key (default: true)
-	CacheByProvider              *bool  `json:"cache_by_provider,omitempty"`              // Include provider in cache key (default: true)
-	ExcludeSystemPrompt          *bool  `json:"exclude_system_prompt,omitempty"`          // Exclude system prompt in cache key (default: false)
-}
-
-// UnmarshalJSON implements custom JSON unmarshaling for Config so TTL accepts
-// either a duration string ("1m", "1h") or a JSON number (seconds). All other
-// fields decode through the default path via a type alias, so adding a new
-// field on Config does not require touching this method.
-func (c *Config) UnmarshalJSON(data []byte) error {
-	// alias suppresses Config's UnmarshalJSON to avoid infinite recursion.
-	// The outer TTL (json.RawMessage) shadows alias.TTL because the json
-	// package picks the shallower field on a name conflict.
-	type alias Config
-	aux := &struct {
-		TTL json.RawMessage `json:"ttl,omitempty"`
-		*alias
-	}{alias: (*alias)(c)}
-	if err := json.Unmarshal(data, aux); err != nil {
-		return fmt.Errorf("failed to unmarshal config: %w", err)
-	}
-
-	if len(aux.TTL) == 0 || string(aux.TTL) == "null" {
-		return nil
-	}
-
-	// Try string first ("1m"); fall back to a JSON number (seconds).
-	var s string
-	if err := json.Unmarshal(aux.TTL, &s); err == nil {
-		d, err := time.ParseDuration(s)
-		if err != nil {
-			return fmt.Errorf("failed to parse TTL duration string '%s': %w", s, err)
-		}
-		c.TTL = d
-	} else {
-		var seconds float64
-		if err := json.Unmarshal(aux.TTL, &seconds); err != nil {
-			return fmt.Errorf("unsupported TTL value: %s", string(aux.TTL))
-		}
-		c.TTL = time.Duration(seconds * float64(time.Second))
-	}
-	if c.TTL < 0 {
-		return fmt.Errorf("TTL must be non-negative, got %v", c.TTL)
-	}
-	return nil
-}
+// Config is an alias for configstore.LocalCacheConfig — the framework owns
+// the canonical type so a single shared pointer can be live-mutated by the
+// REST handler without going through plugin Reload. Method set (including
+// the custom TTL UnmarshalJSON) is inherited via the alias.
+type Config = configstore.LocalCacheConfig
 
 // StreamChunk is one chunk from a streaming response, retained until the
 // stream completes so it can be persisted as part of the cache entry.
@@ -162,8 +104,8 @@ type Plugin struct {
 
 // Plugin constants
 const (
-	PluginName                          string        = "semantic_cache"
-	DefaultVectorStoreNamespace         string        = "BifrostSemanticCachePlugin"
+	PluginName                          string        = "local_cache"
+	DefaultVectorStoreNamespace         string        = "BifrostLocalCachePlugin"
 	CacheConnectionTimeout              time.Duration = 5 * time.Second
 	CreateNamespaceTimeout              time.Duration = 30 * time.Second
 	CacheSetTimeout                     time.Duration = 30 * time.Second
@@ -173,7 +115,7 @@ const (
 )
 
 // SelectFields enumerates the properties projected back from the vector store
-// on a cache hit. params_hash and from_bifrost_semantic_cache_plugin are
+// on a cache hit. params_hash and from_bifrost_local_cache_plugin are
 // filter-only (used in WHERE-style queries to narrow matches) and intentionally
 // omitted from this projection — keep them defined in VectorStoreProperties
 // below so the store creates the columns/indexes, but don't fetch them.
@@ -208,9 +150,9 @@ var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{
 		DataType:    vectorstore.VectorStorePropertyTypeString,
 		Description: "The hash of the parameters used for the request",
 	},
-	"from_bifrost_semantic_cache_plugin": {
+	"from_bifrost_local_cache_plugin": {
 		DataType:    vectorstore.VectorStorePropertyTypeBoolean,
-		Description: "Whether the cache entry was created by the BifrostSemanticCachePlugin",
+		Description: "Whether the cache entry was created by the BifrostLocalCachePlugin",
 	},
 }
 
@@ -218,11 +160,11 @@ var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{
 // request enters Bifrost; the plugin reads them in Pre/PostLLMHook. CacheKey
 // (or Config.DefaultCacheKey) is the only one required for caching to engage.
 const (
-	CacheKey          schemas.BifrostContextKey = "semantic_cache-key"        // String. Required (or DefaultCacheKey) — bucket entries under a tenant/feature scope.
-	CacheTTLKey       schemas.BifrostContextKey = "semantic_cache-ttl"        // time.Duration. Per-request override of Config.TTL.
-	CacheThresholdKey schemas.BifrostContextKey = "semantic_cache-threshold"  // float64. Per-request override of the semantic similarity threshold.
-	CacheTypeKey      schemas.BifrostContextKey = "semantic_cache-cache_type" // CacheType. Narrow lookup to a single path (direct or semantic).
-	CacheNoStoreKey   schemas.BifrostContextKey = "semantic_cache-no_store"   // bool. Skip writing the response to cache (still served from cache on hit).
+	CacheKey          schemas.BifrostContextKey = "local_cache-key"        // String. Required (or DefaultCacheKey) — bucket entries under a tenant/feature scope.
+	CacheTTLKey       schemas.BifrostContextKey = "local_cache-ttl"        // time.Duration. Per-request override of Config.TTL.
+	CacheThresholdKey schemas.BifrostContextKey = "local_cache-threshold"  // float64. Per-request override of the semantic similarity threshold.
+	CacheTypeKey      schemas.BifrostContextKey = "local_cache-cache_type" // CacheType. Narrow lookup to a single path (direct or semantic).
+	CacheNoStoreKey   schemas.BifrostContextKey = "local_cache-no_store"   // bool. Skip writing the response to cache (still served from cache on hit).
 )
 
 type CacheType string
@@ -295,7 +237,7 @@ func Init(ctx context.Context, config *Config, logger schemas.Logger, store vect
 	createCtx, cancel := context.WithTimeout(ctx, CreateNamespaceTimeout)
 	defer cancel()
 	if err := store.CreateNamespace(createCtx, config.VectorStoreNamespace, config.Dimension, VectorStoreProperties); err != nil {
-		return nil, fmt.Errorf("failed to create namespace for semantic cache: %w", err)
+		return nil, fmt.Errorf("failed to create namespace for local cache: %w", err)
 	}
 
 	plugin.cleanupWg.Add(1)
@@ -307,17 +249,34 @@ func Init(ctx context.Context, config *Config, logger schemas.Logger, store vect
 	return plugin, nil
 }
 
+// EnsureNamespace creates the vector-store namespace pointed at by the
+// current shared config. Idempotent: a no-op if the namespace already
+// exists with the requested schema, otherwise creates it. Called by the
+// REST handler after a config change so a freshly-mutated
+// VectorStoreNamespace or Dimension takes effect on the next request
+// without a plugin restart. Old namespaces are left in place — by
+// contract (per spec) we do not flush cached entries on dimension change.
+func (plugin *Plugin) EnsureNamespace(ctx context.Context) error {
+	namespace := plugin.config.VectorStoreNamespace
+	if namespace == "" {
+		namespace = DefaultVectorStoreNamespace
+	}
+	createCtx, cancel := context.WithTimeout(ctx, CreateNamespaceTimeout)
+	defer cancel()
+	return plugin.store.CreateNamespace(createCtx, namespace, plugin.config.Dimension, VectorStoreProperties)
+}
+
 // GetName returns the canonical name used for plugin identification and logging.
 func (plugin *Plugin) GetName() string {
 	return PluginName
 }
 
-// HTTPTransportPreHook is not used by the semantic cache plugin.
+// HTTPTransportPreHook is not used by the local cache plugin.
 func (plugin *Plugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest) (*schemas.HTTPResponse, error) {
 	return nil, nil
 }
 
-// HTTPTransportPostHook is not used by the semantic cache plugin.
+// HTTPTransportPostHook is not used by the local cache plugin.
 func (plugin *Plugin) HTTPTransportPostHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, resp *schemas.HTTPResponse) error {
 	return nil
 }
@@ -347,7 +306,7 @@ func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.Bifro
 		return req, nil, nil
 	}
 
-	if !isSemanticCacheSupportedRequestType(req.RequestType) {
+	if !isLocalCacheSupportedRequestType(req.RequestType) {
 		return req, nil, nil
 	}
 
@@ -660,7 +619,7 @@ func (plugin *Plugin) WaitForPendingOperations() {
 
 // Cleanup signals the background loops to stop and waits for in-flight cache
 // writes to drain before returning. When CleanUpOnShutdown is true, it then
-// deletes every entry tagged from_bifrost_semantic_cache_plugin and drops
+// deletes every entry tagged from_bifrost_local_cache_plugin and drops
 // the namespace — useful for ephemeral test environments. The default is to
 // leave entries in place so they can serve subsequent process restarts.
 func (plugin *Plugin) Cleanup() error {
@@ -688,7 +647,7 @@ func (plugin *Plugin) Cleanup() error {
 	// Delete all cache entries created by this plugin
 	queries := []vectorstore.Query{
 		{
-			Field:    "from_bifrost_semantic_cache_plugin",
+			Field:    "from_bifrost_local_cache_plugin",
 			Operator: vectorstore.QueryOperatorEqual,
 			Value:    true,
 		},
@@ -731,7 +690,7 @@ func (plugin *Plugin) ClearCacheForKey(cacheKey string) error {
 			Value:    cacheKey,
 		},
 		{
-			Field:    "from_bifrost_semantic_cache_plugin",
+			Field:    "from_bifrost_local_cache_plugin",
 			Operator: vectorstore.QueryOperatorEqual,
 			Value:    true,
 		},
diff --git a/plugins/semanticcache/main_test.go b/plugins/localcache/main_test.go
similarity index 98%
rename from plugins/semanticcache/main_test.go
rename to plugins/localcache/main_test.go
index 0924fa726b..85a9f3f642 100644
--- a/plugins/semanticcache/main_test.go
+++ b/plugins/localcache/main_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
diff --git a/plugins/semanticcache/plugin_api_test.go b/plugins/localcache/plugin_api_test.go
similarity index 98%
rename from plugins/semanticcache/plugin_api_test.go
rename to plugins/localcache/plugin_api_test.go
index 908e88149d..1ceb7472c9 100644
--- a/plugins/semanticcache/plugin_api_test.go
+++ b/plugins/localcache/plugin_api_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -145,7 +145,7 @@ func TestClearCacheForKey_FiltersByCacheKeyAndPluginMarker(t *testing.T) {
 		if q.Field == "cache_key" && q.Value == "session-42" && q.Operator == vectorstore.QueryOperatorEqual {
 			gotKey = true
 		}
-		if q.Field == "from_bifrost_semantic_cache_plugin" && q.Value == true {
+		if q.Field == "from_bifrost_local_cache_plugin" && q.Value == true {
 			gotMarker = true
 		}
 	}
@@ -153,7 +153,7 @@ func TestClearCacheForKey_FiltersByCacheKeyAndPluginMarker(t *testing.T) {
 		t.Errorf("expected cache_key=session-42 filter, got %+v", queries)
 	}
 	if !gotMarker {
-		t.Errorf("expected from_bifrost_semantic_cache_plugin=true filter, got %+v", queries)
+		t.Errorf("expected from_bifrost_local_cache_plugin=true filter, got %+v", queries)
 	}
 }
 
diff --git a/plugins/semanticcache/plugin_cache_type_test.go b/plugins/localcache/plugin_cache_type_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_cache_type_test.go
rename to plugins/localcache/plugin_cache_type_test.go
index 9d8d655a1d..dd17b219fe 100644
--- a/plugins/semanticcache/plugin_cache_type_test.go
+++ b/plugins/localcache/plugin_cache_type_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
diff --git a/plugins/semanticcache/plugin_conversation_config_test.go b/plugins/localcache/plugin_conversation_config_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_conversation_config_test.go
rename to plugins/localcache/plugin_conversation_config_test.go
index c8c80c2db0..a3a85a8d9b 100644
--- a/plugins/semanticcache/plugin_conversation_config_test.go
+++ b/plugins/localcache/plugin_conversation_config_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"strconv"
diff --git a/plugins/semanticcache/plugin_core_test.go b/plugins/localcache/plugin_core_test.go
similarity index 97%
rename from plugins/semanticcache/plugin_core_test.go
rename to plugins/localcache/plugin_core_test.go
index b14c543720..70e97e97d5 100644
--- a/plugins/semanticcache/plugin_core_test.go
+++ b/plugins/localcache/plugin_core_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -10,13 +10,13 @@ import (
 	"github.com/maximhq/bifrost/framework/vectorstore"
 )
 
-// TestSemanticCacheBasicFunctionality tests the core caching functionality.
+// TestLocalCacheBasicFunctionality tests the core caching functionality.
 //
 // Intentionally NOT parallel: the assertions at the bottom of this function
 // enforce wall-clock comparisons (cache must be faster than upstream, with at
 // least 1.5× speedup). Running this in parallel with other integration tests
 // causes CPU/network contention that flakes those ratios.
-func TestSemanticCacheBasicFunctionality(t *testing.T) {
+func TestLocalCacheBasicFunctionality(t *testing.T) {
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
 
@@ -200,7 +200,7 @@ func TestSemanticSearch(t *testing.T) {
 	}
 
 	// Performance comparison
-	t.Logf("Semantic Cache Performance:")
+	t.Logf("Local Cache Performance:")
 	t.Logf("First request (OpenAI):     %v", duration1)
 	t.Logf("Second request (Semantic):  %v", duration2)
 
@@ -530,11 +530,11 @@ func (m *MockUnsupportedStore) DeleteAll(ctx context.Context, namespace string,
 	return nil, vectorstore.ErrNotSupported
 }
 
-func (m *MockUnsupportedStore) SearchSemanticCache(ctx context.Context, queryEmbedding []float32, metadata map[string]interface{}, threshold float64, limit int64) ([]vectorstore.SearchResult, error) {
+func (m *MockUnsupportedStore) SearchLocalCache(ctx context.Context, queryEmbedding []float32, metadata map[string]interface{}, threshold float64, limit int64) ([]vectorstore.SearchResult, error) {
 	return nil, vectorstore.ErrNotSupported
 }
 
-func (m *MockUnsupportedStore) AddSemanticCache(ctx context.Context, key string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) error {
+func (m *MockUnsupportedStore) AddLocalCache(ctx context.Context, key string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) error {
 	return vectorstore.ErrNotSupported
 }
 
diff --git a/plugins/semanticcache/plugin_cross_cache_test.go b/plugins/localcache/plugin_cross_cache_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_cross_cache_test.go
rename to plugins/localcache/plugin_cross_cache_test.go
index 00f1085443..99853e5ee1 100644
--- a/plugins/semanticcache/plugin_cross_cache_test.go
+++ b/plugins/localcache/plugin_cross_cache_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
diff --git a/plugins/semanticcache/plugin_default_cache_key_test.go b/plugins/localcache/plugin_default_cache_key_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_default_cache_key_test.go
rename to plugins/localcache/plugin_default_cache_key_test.go
index db8e78443a..ec744cde69 100644
--- a/plugins/semanticcache/plugin_default_cache_key_test.go
+++ b/plugins/localcache/plugin_default_cache_key_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
diff --git a/plugins/semanticcache/plugin_edge_cases_test.go b/plugins/localcache/plugin_edge_cases_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_edge_cases_test.go
rename to plugins/localcache/plugin_edge_cases_test.go
index 946daca1a9..0c3a5ce747 100644
--- a/plugins/semanticcache/plugin_edge_cases_test.go
+++ b/plugins/localcache/plugin_edge_cases_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"strings"
diff --git a/plugins/semanticcache/plugin_embedding_test.go b/plugins/localcache/plugin_embedding_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_embedding_test.go
rename to plugins/localcache/plugin_embedding_test.go
index e42f71c63c..defe274233 100644
--- a/plugins/semanticcache/plugin_embedding_test.go
+++ b/plugins/localcache/plugin_embedding_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
diff --git a/plugins/semanticcache/plugin_image_generation_test.go b/plugins/localcache/plugin_image_generation_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_image_generation_test.go
rename to plugins/localcache/plugin_image_generation_test.go
index c6dee8d347..761f9c97ba 100644
--- a/plugins/semanticcache/plugin_image_generation_test.go
+++ b/plugins/localcache/plugin_image_generation_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"os"
@@ -221,7 +221,7 @@ func TestImageGenerationSemanticSearch(t *testing.T) {
 	}
 
 	// Performance comparison
-	t.Logf("Semantic Cache Performance:")
+	t.Logf("Local Cache Performance:")
 	t.Logf("First request (OpenAI):     %v", duration1)
 	t.Logf("Second request (Semantic):  %v", duration2)
 
diff --git a/plugins/semanticcache/plugin_integration_test.go b/plugins/localcache/plugin_integration_test.go
similarity index 95%
rename from plugins/semanticcache/plugin_integration_test.go
rename to plugins/localcache/plugin_integration_test.go
index c153928972..6137838b91 100644
--- a/plugins/semanticcache/plugin_integration_test.go
+++ b/plugins/localcache/plugin_integration_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"strings"
@@ -10,8 +10,8 @@ import (
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
-// TestSemanticCacheBasicFlow tests the complete semantic cache flow
-func TestSemanticCacheBasicFlow(t *testing.T) {
+// TestLocalCacheBasicFlow tests the complete local cache flow
+func TestLocalCacheBasicFlow(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
@@ -153,11 +153,11 @@ func TestSemanticCacheBasicFlow(t *testing.T) {
 	}
 
 	t.Log("✅ Content verification passed - original and cached responses match")
-	t.Log("🎉 Basic semantic cache flow test passed!")
+	t.Log("🎉 Basic local cache flow test passed!")
 }
 
-// TestSemanticCacheStrictFiltering tests that the cache respects parameter differences
-func TestSemanticCacheStrictFiltering(t *testing.T) {
+// TestLocalCacheStrictFiltering tests that the cache respects parameter differences
+func TestLocalCacheStrictFiltering(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
@@ -305,8 +305,8 @@ func TestSemanticCacheStrictFiltering(t *testing.T) {
 	t.Log("🎉 Strict filtering test passed!")
 }
 
-// TestSemanticCacheStreamingFlow tests streaming response caching
-func TestSemanticCacheStreamingFlow(t *testing.T) {
+// TestLocalCacheStreamingFlow tests streaming response caching
+func TestLocalCacheStreamingFlow(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
@@ -442,8 +442,8 @@ func TestSemanticCacheStreamingFlow(t *testing.T) {
 	t.Log("🎉 Streaming cache test passed!")
 }
 
-// TestSemanticCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context
-func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) {
+// TestLocalCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context
+func TestLocalCache_NoCacheWhenKeyMissing(t *testing.T) {
 	t.Parallel()
 	t.Log("Testing cache behavior when cache key is missing...")
 
@@ -482,8 +482,8 @@ func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) {
 	t.Log("🎉 No cache key test passed!")
 }
 
-// TestSemanticCache_CustomTTLHandling verifies cache respects custom TTL values from context
-func TestSemanticCache_CustomTTLHandling(t *testing.T) {
+// TestLocalCache_CustomTTLHandling verifies cache respects custom TTL values from context
+func TestLocalCache_CustomTTLHandling(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
@@ -570,8 +570,8 @@ func TestSemanticCache_CustomTTLHandling(t *testing.T) {
 	t.Log("✅ Custom TTL configuration test passed (entry written and retrievable)")
 }
 
-// TestSemanticCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context
-func TestSemanticCache_CustomThresholdHandling(t *testing.T) {
+// TestLocalCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context
+func TestLocalCache_CustomThresholdHandling(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
@@ -644,8 +644,8 @@ func TestSemanticCache_CustomThresholdHandling(t *testing.T) {
 	t.Log("✅ Custom threshold override tracked through PreLLMHook without breaking direct path")
 }
 
-// TestSemanticCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags
-func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) {
+// TestLocalCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags
+func TestLocalCache_ProviderModelCachingFlags(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
@@ -752,8 +752,8 @@ func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) {
 	t.Log("✅ CacheByProvider=false + CacheByModel=false correctly shares entries across providers/models")
 }
 
-// TestSemanticCache_ConfigurationEdgeCases verifies edge cases in configuration handling
-func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) {
+// TestLocalCache_ConfigurationEdgeCases verifies edge cases in configuration handling
+func TestLocalCache_ConfigurationEdgeCases(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
diff --git a/plugins/semanticcache/plugin_nil_content_test.go b/plugins/localcache/plugin_nil_content_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_nil_content_test.go
rename to plugins/localcache/plugin_nil_content_test.go
index 8337beb943..bc289ae42b 100644
--- a/plugins/semanticcache/plugin_nil_content_test.go
+++ b/plugins/localcache/plugin_nil_content_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"strings"
diff --git a/plugins/semanticcache/plugin_no_mutation_test.go b/plugins/localcache/plugin_no_mutation_test.go
similarity index 98%
rename from plugins/semanticcache/plugin_no_mutation_test.go
rename to plugins/localcache/plugin_no_mutation_test.go
index 340b4fdd9a..d00a454046 100644
--- a/plugins/semanticcache/plugin_no_mutation_test.go
+++ b/plugins/localcache/plugin_no_mutation_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -14,7 +14,7 @@ import (
 )
 
 // requestCapturer is an LLMPlugin that records the request it sees in
-// PreLLMHook. Placed AFTER semantic_cache in the plugin chain it observes
+// PreLLMHook. Placed AFTER local_cache in the plugin chain it observes
 // the request post-cache-plugin-mutation; we then assert that nothing
 // landed in the request that originated from cache-side normalization
 // (lowercase, whitespace-trim, system-prompt filtering, etc.).
diff --git a/plugins/semanticcache/plugin_no_store_test.go b/plugins/localcache/plugin_no_store_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_no_store_test.go
rename to plugins/localcache/plugin_no_store_test.go
index aef75171ff..a7f9174000 100644
--- a/plugins/semanticcache/plugin_no_store_test.go
+++ b/plugins/localcache/plugin_no_store_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
diff --git a/plugins/semanticcache/plugin_normalization_test.go b/plugins/localcache/plugin_normalization_test.go
similarity index 98%
rename from plugins/semanticcache/plugin_normalization_test.go
rename to plugins/localcache/plugin_normalization_test.go
index bb7e8c5144..d13a96fdcf 100644
--- a/plugins/semanticcache/plugin_normalization_test.go
+++ b/plugins/localcache/plugin_normalization_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
@@ -277,8 +277,8 @@ func TestChatCompletionContentBlocksNormalization(t *testing.T) {
 	}
 }
 
-// TestNormalizationWithSemanticCache tests that normalization works with semantic cache as well
-func TestNormalizationWithSemanticCache(t *testing.T) {
+// TestNormalizationWithLocalCache tests that normalization works with semantic cache as well
+func TestNormalizationWithLocalCache(t *testing.T) {
 	t.Parallel()
 	setup := NewTestSetup(t)
 	defer setup.Cleanup()
diff --git a/plugins/semanticcache/plugin_paths_test.go b/plugins/localcache/plugin_paths_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_paths_test.go
rename to plugins/localcache/plugin_paths_test.go
index 5ca1ac8c7a..fead92b7da 100644
--- a/plugins/semanticcache/plugin_paths_test.go
+++ b/plugins/localcache/plugin_paths_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -252,8 +252,8 @@ func TestExpiredEntry_DetectedAndDeleted(t *testing.T) {
 // WebSocketResponsesRequest support
 // -----------------------------------------------------------------------------
 
-func TestIsSemanticCacheSupportedRequestType_WebSocket(t *testing.T) {
-	if !isSemanticCacheSupportedRequestType(schemas.WebSocketResponsesRequest) {
+func TestIsLocalCacheSupportedRequestType_WebSocket(t *testing.T) {
+	if !isLocalCacheSupportedRequestType(schemas.WebSocketResponsesRequest) {
 		t.Fatal("WebSocketResponsesRequest should be supported")
 	}
 }
diff --git a/plugins/semanticcache/plugin_responses_test.go b/plugins/localcache/plugin_responses_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_responses_test.go
rename to plugins/localcache/plugin_responses_test.go
index 2474ea88c1..013e26f591 100644
--- a/plugins/semanticcache/plugin_responses_test.go
+++ b/plugins/localcache/plugin_responses_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
diff --git a/plugins/semanticcache/plugin_streaming_test.go b/plugins/localcache/plugin_streaming_test.go
similarity index 99%
rename from plugins/semanticcache/plugin_streaming_test.go
rename to plugins/localcache/plugin_streaming_test.go
index 7a85717c7f..14c0e4c896 100644
--- a/plugins/semanticcache/plugin_streaming_test.go
+++ b/plugins/localcache/plugin_streaming_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"testing"
diff --git a/plugins/semanticcache/plugin_vectorstore_test.go b/plugins/localcache/plugin_vectorstore_test.go
similarity index 94%
rename from plugins/semanticcache/plugin_vectorstore_test.go
rename to plugins/localcache/plugin_vectorstore_test.go
index 6d29f08c8b..b061c09401 100644
--- a/plugins/semanticcache/plugin_vectorstore_test.go
+++ b/plugins/localcache/plugin_vectorstore_test.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"os"
@@ -60,8 +60,8 @@ func getDefaultTestConfig() *Config {
 	}
 }
 
-// TestSemanticCache_AllVectorStores_BasicFlow tests the basic cache flow across all vector stores
-func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) {
+// TestLocalCache_AllVectorStores_BasicFlow tests the basic cache flow across all vector stores
+func TestLocalCache_AllVectorStores_BasicFlow(t *testing.T) {
 	t.Parallel()
 	for _, tc := range getVectorStoreTestCases() {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -171,8 +171,8 @@ func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) {
 	}
 }
 
-// TestSemanticCache_AllVectorStores_DirectHashMatch tests direct hash matching across all vector stores
-func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) {
+// TestLocalCache_AllVectorStores_DirectHashMatch tests direct hash matching across all vector stores
+func TestLocalCache_AllVectorStores_DirectHashMatch(t *testing.T) {
 	t.Parallel()
 	for _, tc := range getVectorStoreTestCases() {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -214,8 +214,8 @@ func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) {
 	}
 }
 
-// TestSemanticCache_AllVectorStores_NamespaceIsolation tests that different cache keys are isolated
-func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) {
+// TestLocalCache_AllVectorStores_NamespaceIsolation tests that different cache keys are isolated
+func TestLocalCache_AllVectorStores_NamespaceIsolation(t *testing.T) {
 	t.Parallel()
 	for _, tc := range getVectorStoreTestCases() {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -270,8 +270,8 @@ func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) {
 	}
 }
 
-// TestSemanticCache_AllVectorStores_ParameterFiltering tests that different parameters don't share cache
-func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) {
+// TestLocalCache_AllVectorStores_ParameterFiltering tests that different parameters don't share cache
+func TestLocalCache_AllVectorStores_ParameterFiltering(t *testing.T) {
 	t.Parallel()
 	for _, tc := range getVectorStoreTestCases() {
 		t.Run(tc.Name, func(t *testing.T) {
@@ -385,8 +385,8 @@ func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) {
 	}
 }
 
-// TestSemanticCache_AllVectorStores_EmbeddingRequest tests embedding request caching across all vector stores
-func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) {
+// TestLocalCache_AllVectorStores_EmbeddingRequest tests embedding request caching across all vector stores
+func TestLocalCache_AllVectorStores_EmbeddingRequest(t *testing.T) {
 	t.Parallel()
 	for _, tc := range getVectorStoreTestCases() {
 		t.Run(tc.Name, func(t *testing.T) {
diff --git a/plugins/semanticcache/search.go b/plugins/localcache/search.go
similarity index 98%
rename from plugins/semanticcache/search.go
rename to plugins/localcache/search.go
index dfd15ed7f8..568725acb3 100644
--- a/plugins/semanticcache/search.go
+++ b/plugins/localcache/search.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -80,7 +80,7 @@ func (plugin *Plugin) performSemanticSearch(ctx *schemas.BifrostContext, state *
 	strictFilters := []vectorstore.Query{
 		{Field: "cache_key", Operator: vectorstore.QueryOperatorEqual, Value: cacheKey},
 		{Field: "params_hash", Operator: vectorstore.QueryOperatorEqual, Value: paramsHash},
-		{Field: "from_bifrost_semantic_cache_plugin", Operator: vectorstore.QueryOperatorEqual, Value: true},
+		{Field: "from_bifrost_local_cache_plugin", Operator: vectorstore.QueryOperatorEqual, Value: true},
 	}
 	if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider {
 		strictFilters = append(strictFilters, vectorstore.Query{Field: "provider", Operator: vectorstore.QueryOperatorEqual, Value: string(provider)})
@@ -92,7 +92,7 @@ func (plugin *Plugin) performSemanticSearch(ctx *schemas.BifrostContext, state *
 	selectFields := selectFieldsForRequest(req.RequestType)
 	results, err := plugin.store.GetNearest(ctx, plugin.config.VectorStoreNamespace, embedding, strictFilters, selectFields, cacheThreshold, 1)
 	if err != nil {
-		return nil, fmt.Errorf("failed to search semantic cache: %w", err)
+		return nil, fmt.Errorf("failed to search local cache: %w", err)
 	}
 	if len(results) == 0 {
 		return nil, nil
diff --git a/plugins/semanticcache/state.go b/plugins/localcache/state.go
similarity index 97%
rename from plugins/semanticcache/state.go
rename to plugins/localcache/state.go
index 489c329076..a3feefbf6f 100644
--- a/plugins/semanticcache/state.go
+++ b/plugins/localcache/state.go
@@ -1,10 +1,10 @@
-package semanticcache
+package localcache
 
 import (
 	"time"
 )
 
-// cacheState holds per-request state for the semantic cache plugin. It's
+// cacheState holds per-request state for the local cache plugin. It's
 // keyed by the request ID and lives between PreLLMHook (where it's populated)
 // and PostLLMHook (where it's consumed and cleared).
 //
diff --git a/plugins/semanticcache/stream.go b/plugins/localcache/stream.go
similarity index 99%
rename from plugins/semanticcache/stream.go
rename to plugins/localcache/stream.go
index f8c3fd7b3a..86ca75ab75 100644
--- a/plugins/semanticcache/stream.go
+++ b/plugins/localcache/stream.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
diff --git a/plugins/semanticcache/test_utils.go b/plugins/localcache/test_utils.go
similarity index 98%
rename from plugins/semanticcache/test_utils.go
rename to plugins/localcache/test_utils.go
index 5bfbbcffbd..1790677078 100644
--- a/plugins/semanticcache/test_utils.go
+++ b/plugins/localcache/test_utils.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -179,7 +179,7 @@ func (baseAccount *BaseAccount) GetConfigForProvider(providerKey schemas.ModelPr
 	}, nil
 }
 
-// getMockRules returns a list of mock rules for the semantic cache tests
+// getMockRules returns a list of mock rules for the local cache tests
 func getMockRules() []mocker.MockRule {
 	return []mocker.MockRule{
 		// Core test prompts
@@ -366,7 +366,7 @@ func getMockRules() []mocker.MockRule {
 }
 
 // getMockedBifrostClient creates a Bifrost client with a mocker plugin for testing
-func getMockedBifrostClient(t *testing.T, ctx *schemas.BifrostContext, logger schemas.Logger, semanticCachePlugin schemas.LLMPlugin) *bifrost.Bifrost {
+func getMockedBifrostClient(t *testing.T, ctx *schemas.BifrostContext, logger schemas.Logger, localCachePlugin schemas.LLMPlugin) *bifrost.Bifrost {
 	mockerCfg := mocker.MockerConfig{
 		Enabled: true,
 		Rules:   getMockRules(),
@@ -380,7 +380,7 @@ func getMockedBifrostClient(t *testing.T, ctx *schemas.BifrostContext, logger sc
 	account := &BaseAccount{}
 	client, err := bifrost.Init(ctx, schemas.BifrostConfig{
 		Account:    account,
-		LLMPlugins: []schemas.LLMPlugin{semanticCachePlugin, mockerPlugin},
+		LLMPlugins: []schemas.LLMPlugin{localCachePlugin, mockerPlugin},
 		Logger:     logger,
 	})
 	if err != nil {
@@ -419,7 +419,7 @@ func NewTestSetupWithConfig(t *testing.T, config *Config) *TestSetup {
 // Mirrors production: many concurrent requests hit one namespace, isolated
 // by per-test cache_keys (see keyForTest). Distinct from the plugin's
 // production default so test runs can't collide with a real cache.
-const SharedTestNamespace = "BifrostSemanticCachePluginTest"
+const SharedTestNamespace = "BifrostLocalCachePluginTest"
 
 var (
 	sharedTestNamespaceOnce sync.Once
diff --git a/plugins/semanticcache/utils.go b/plugins/localcache/utils.go
similarity index 99%
rename from plugins/semanticcache/utils.go
rename to plugins/localcache/utils.go
index 29f15fc825..98b945c505 100644
--- a/plugins/semanticcache/utils.go
+++ b/plugins/localcache/utils.go
@@ -1,4 +1,4 @@
-package semanticcache
+package localcache
 
 import (
 	"context"
@@ -21,12 +21,12 @@ import (
 // same (cache_key, request_hash, params_hash) tuple maps to the same ID.
 var directCacheNamespace = uuid.MustParse("b1f3c2d4-e5a6-7890-abcd-ef1234567890")
 
-// isSemanticCacheSupportedRequestType reports whether semantic cache supports
+// isLocalCacheSupportedRequestType reports whether the local cache supports
 // this request type for cache lookup and storage. Unsupported types are skipped.
 //
 // IMPORTANT: this list must stay in sync with the switch in buildRequestMetadataForCaching.
 // When adding a new case there, add it here too.
-func isSemanticCacheSupportedRequestType(requestType schemas.RequestType) bool {
+func isLocalCacheSupportedRequestType(requestType schemas.RequestType) bool {
 	switch requestType {
 	case schemas.TextCompletionRequest,
 		schemas.TextCompletionStreamRequest,
@@ -123,7 +123,7 @@ func normalizeText(text string) string {
 	return strings.ToLower(strings.TrimSpace(text))
 }
 
-// float64ToFloat32Embedding converts a []float64 to a []float32. The semantic cache
+// float64ToFloat32Embedding converts a []float64 to a []float32. The local cache
 // keeps vector payloads as float32 even though the embedding APIs now
 // preserve full float64 precision — the cosine similarity used at query
 // time is well within float32 range.
@@ -444,7 +444,7 @@ func (plugin *Plugin) buildUnifiedMetadata(provider schemas.ModelProvider, model
 	unifiedMetadata["provider"] = string(provider)
 	unifiedMetadata["model"] = model
 	unifiedMetadata["cache_key"] = cacheKey
-	unifiedMetadata["from_bifrost_semantic_cache_plugin"] = true
+	unifiedMetadata["from_bifrost_local_cache_plugin"] = true
 	unifiedMetadata["expires_at"] = time.Now().Add(ttl).Unix()
 	if paramsHash != "" {
 		unifiedMetadata["params_hash"] = paramsHash
diff --git a/plugins/semanticcache/version b/plugins/localcache/version
similarity index 100%
rename from plugins/semanticcache/version
rename to plugins/localcache/version
diff --git a/plugins/logging/main.go b/plugins/logging/main.go
index 2216bc9879..1d5a67f92c 100644
--- a/plugins/logging/main.go
+++ b/plugins/logging/main.go
@@ -228,7 +228,7 @@ type LogMessage struct {
 	Timestamp          time.Time                          // Of the preHook/postHook call
 	Latency            int64                              // For latency updates
 	InitialData        *InitialLogData                    // For create operations
-	SemanticCacheDebug *schemas.BifrostCacheDebug         // For semantic cache operations
+	LocalCacheDebug *schemas.BifrostCacheDebug         // For local cache operations
 	UpdateData         *UpdateLogData                     // For update operations
 	StreamResponse     *streaming.ProcessedStreamResponse // For streaming delta updates
 	RoutingEngineLogs  string                             // Formatted routing engine decision logs
@@ -830,7 +830,7 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas.
 	// Build the complete log entry with input (from PreLLMHook) + output (from PostLLMHook)
 	entry := buildCompleteLogEntryFromPending(pending)
 	// Apply common output fields. For cache hits, prefer the cache-serve
-	// latency stamped by the semantic cache plugin over the original provider
+	// latency stamped by the local cache plugin over the original provider
 	// latency preserved in the cached response.
 	var latency int64
 	if result != nil {
diff --git a/plugins/mocker/main.go b/plugins/mocker/main.go
index 53ce896615..8362156b7f 100644
--- a/plugins/mocker/main.go
+++ b/plugins/mocker/main.go
@@ -570,7 +570,7 @@ func (p *MockerPlugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.Bifr
 	}
 
 	// For streaming requests with a short-circuit response, mark the stream as complete
-	// This is required for plugins like semantic cache that need to know when the stream ends
+	// This is required for plugins like local cache that need to know when the stream ends
 	if shortCircuit != nil && shortCircuit.Response != nil && bifrost.IsStreamRequestType(req.RequestType) {
 		ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
 	}
diff --git a/terraform/modules/bifrost/variables.tf b/terraform/modules/bifrost/variables.tf
index dbd7103f27..8f0951ad59 100644
--- a/terraform/modules/bifrost/variables.tf
+++ b/terraform/modules/bifrost/variables.tf
@@ -121,7 +121,7 @@ variable "guardrails_config" {
 }
 
 variable "plugins" {
-  description = "Plugins configuration. Array of plugin objects (telemetry, logging, governance, maxim, semantic_cache, otel, datadog)."
+  description = "Plugins configuration. Array of plugin objects (telemetry, localcache, logging, governance, maxim, otel, datadog)."
   type        = any
   default     = null
 }
diff --git a/transports/Dockerfile.local b/transports/Dockerfile.local
index 2d3f03edb3..5e6e67130f 100644
--- a/transports/Dockerfile.local
+++ b/transports/Dockerfile.local
@@ -39,12 +39,12 @@
     go work use ./plugins/compat && \
     go work use ./plugins/governance && \
     go work use ./plugins/jsonparser && \
+    go work use ./plugins/localcache && \
     go work use ./plugins/logging && \
     go work use ./plugins/maxim && \
     go work use ./plugins/mocker && \
     go work use ./plugins/otel && \
     go work use ./plugins/prompts && \
-    go work use ./plugins/semanticcache && \
     go work use ./plugins/telemetry && \
     go work use ./transports
 
diff --git a/transports/bifrost-http/handlers/cache.go b/transports/bifrost-http/handlers/cache.go
index 1f173f9679..e596d1638c 100644
--- a/transports/bifrost-http/handlers/cache.go
+++ b/transports/bifrost-http/handlers/cache.go
@@ -3,7 +3,7 @@ package handlers
 import (
 	"github.com/fasthttp/router"
 	"github.com/maximhq/bifrost/core/schemas"
-	"github.com/maximhq/bifrost/plugins/semanticcache"
+	"github.com/maximhq/bifrost/plugins/localcache"
 	"github.com/maximhq/bifrost/transports/bifrost-http/lib"
 	"github.com/valyala/fasthttp"
 )
@@ -21,13 +21,13 @@ type CacheHandler struct {
 }
 
 func NewCacheHandler(plugin schemas.LLMPlugin) *CacheHandler {
-	semanticCachePlugin, ok := plugin.(*semanticcache.Plugin)
+	localCachePlugin, ok := plugin.(*localcache.Plugin)
 	if !ok {
-		logger.Fatal("Cache handler requires a semantic cache plugin")
+		logger.Fatal("Cache handler requires a local cache plugin")
 	}
 
 	return &CacheHandler{
-		plugin: semanticCachePlugin,
+		plugin: localCachePlugin,
 	}
 }
 
diff --git a/transports/bifrost-http/handlers/cache_test.go b/transports/bifrost-http/handlers/cache_test.go
index a27e763c9b..9f1b5a04f4 100644
--- a/transports/bifrost-http/handlers/cache_test.go
+++ b/transports/bifrost-http/handlers/cache_test.go
@@ -9,7 +9,7 @@ import (
 )
 
 // fakeCacheClearer records calls and returns configured errors so the handler
-// branches can be exercised without a real semantic cache plugin.
+// branches can be exercised without a real local cache plugin.
 type fakeCacheClearer struct {
 	clearByID    func(string) error
 	clearByKey   func(string) error
diff --git a/transports/bifrost-http/handlers/config.go b/transports/bifrost-http/handlers/config.go
index 636900670f..7bc929ff27 100644
--- a/transports/bifrost-http/handlers/config.go
+++ b/transports/bifrost-http/handlers/config.go
@@ -20,6 +20,7 @@ import (
 	"github.com/maximhq/bifrost/framework/encrypt"
 	"github.com/maximhq/bifrost/framework/modelcatalog"
 	"github.com/maximhq/bifrost/plugins/compat"
+	"github.com/maximhq/bifrost/plugins/localcache"
 	"github.com/maximhq/bifrost/transports/bifrost-http/lib"
 	"github.com/valyala/fasthttp"
 )
@@ -44,6 +45,7 @@ var securityHeaders = []string{
 type ConfigManager interface {
 	UpdateAuthConfig(ctx context.Context, authConfig *configstore.AuthConfig) error
 	ReloadClientConfigFromConfigStore(ctx context.Context) error
+	ReloadLocalCacheConfigFromConfigStore(ctx context.Context) error
 	UpdateSyncConfig(ctx context.Context) error
 	ForceReloadPricing(ctx context.Context) error
 	UpdateDropExcessRequests(ctx context.Context, value bool)
@@ -384,6 +386,40 @@ func (h *ConfigHandler) updateConfig(ctx *fasthttp.RequestCtx) {
 		}
 	}
 	updatedConfig.Compat = newCompat
+
+	// Handle local cache plugin toggle. EnableLocalCache transitions
+	// (false→true / true→false) drive ReloadPlugin/RemovePlugin so the
+	// plugin is loaded/unloaded without a server restart. Pure config
+	// changes (TTL, threshold, etc.) flow through PUT /api/local-cache/config
+	// and mutate the shared *LocalCacheConfig pointer in place — no plugin
+	// reload involved.
+	if payload.ClientConfig.EnableLocalCache != nil {
+		newEnable := *payload.ClientConfig.EnableLocalCache
+		oldEnable := currentConfig.EnableLocalCache != nil && *currentConfig.EnableLocalCache
+		if newEnable != oldEnable {
+			if newEnable {
+				if h.store.LocalCacheConfig == nil {
+					logger.Warn("cannot enable local cache plugin: no local cache config persisted yet — call PUT /api/local-cache/config first")
+					SendError(ctx, fasthttp.StatusBadRequest, "cannot enable local cache plugin: no local cache config persisted yet — call PUT /api/local-cache/config first")
+					return
+				}
+				if err := h.configManager.ReloadPlugin(ctx, localcache.PluginName, nil, nil, nil, nil); err != nil {
+					logger.Warn("failed to load local cache plugin: %v", err)
+					SendError(ctx, fasthttp.StatusBadRequest, "Failed to load local cache plugin")
+					return
+				}
+			} else {
+				disabledCtx := context.WithValue(ctx, PluginDisabledKey, true)
+				if err := h.configManager.RemovePlugin(disabledCtx, localcache.PluginName); err != nil {
+					logger.Warn("failed to remove local cache plugin: %v", err)
+					SendError(ctx, fasthttp.StatusBadRequest, "Failed to remove local cache plugin")
+					return
+				}
+			}
+		}
+		updatedConfig.EnableLocalCache = payload.ClientConfig.EnableLocalCache
+	}
+
 	// Only update MCP fields if explicitly provided (non-zero) to avoid clearing stored values
 	if payload.ClientConfig.MCPAgentDepth > 0 {
 		updatedConfig.MCPAgentDepth = payload.ClientConfig.MCPAgentDepth
diff --git a/transports/bifrost-http/handlers/local_cache.go b/transports/bifrost-http/handlers/local_cache.go
new file mode 100644
index 0000000000..4b86ee6f36
--- /dev/null
+++ b/transports/bifrost-http/handlers/local_cache.go
@@ -0,0 +1,155 @@
+package handlers
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/fasthttp/router"
+	"github.com/maximhq/bifrost/core/schemas"
+	"github.com/maximhq/bifrost/framework/configstore"
+	"github.com/maximhq/bifrost/plugins/localcache"
+	"github.com/maximhq/bifrost/transports/bifrost-http/lib"
+	"github.com/valyala/fasthttp"
+)
+
+// namespaceEnsurer is the minimal contract the handler needs from the local
+// cache plugin to react to a structural config change (VectorStoreNamespace
+// or Dimension). Defined here so tests can stub it without spinning up a
+// real vector store.
+type namespaceEnsurer interface {
+	EnsureNamespace(ctx context.Context) error
+}
+
+// LocalCacheHandler manages the dedicated config surface for the local
+// cache plugin: a single-row table, GET/PUT semantics, and live mutation
+// of the *configstore.LocalCacheConfig pointer the plugin reads from each
+// request. The plugin enable/disable toggle lives on ClientConfig
+// (EnableLocalCache) and is handled by the generic config compat-shim, not
+// here.
+type LocalCacheHandler struct {
+	store         *lib.Config
+	configManager ConfigManager
+}
+
+// NewLocalCacheHandler constructs the handler.
+func NewLocalCacheHandler(configManager ConfigManager, store *lib.Config) *LocalCacheHandler {
+	return &LocalCacheHandler{configManager: configManager, store: store}
+}
+
+// RegisterRoutes wires GET /api/local-cache/config and PUT
+// /api/local-cache/config under the supplied middleware stack.
+func (h *LocalCacheHandler) RegisterRoutes(r *router.Router, middlewares ...schemas.BifrostHTTPMiddleware) {
+	r.GET("/api/local-cache/config", lib.ChainMiddlewares(h.getConfig, middlewares...))
+	r.PUT("/api/local-cache/config", lib.ChainMiddlewares(h.updateConfig, middlewares...))
+}
+
+// getConfig returns the live LocalCacheConfig, falling back to the DB when
+// the in-memory pointer hasn't been hydrated yet (e.g. enabled=false at
+// boot, no plugin loaded).
+func (h *LocalCacheHandler) getConfig(ctx *fasthttp.RequestCtx) {
+	if h.store.LocalCacheConfig != nil {
+		SendJSON(ctx, *h.store.LocalCacheConfig)
+		return
+	}
+	if h.store.ConfigStore == nil {
+		SendError(ctx, fasthttp.StatusServiceUnavailable, "config store not available")
+		return
+	}
+	dbConfig, err := h.store.ConfigStore.GetLocalCacheConfig(ctx)
+	if err != nil {
+		SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("failed to fetch local cache config: %v", err))
+		return
+	}
+	if dbConfig == nil {
+		// Empty config — return zeros so the UI can render an unconfigured form.
+		SendJSON(ctx, configstore.LocalCacheConfig{})
+		return
+	}
+	SendJSON(ctx, *dbConfig)
+}
+
+// updateConfig validates and persists a new LocalCacheConfig, then mutates
+// the shared in-memory pointer in place so the running plugin sees the new
+// values on its next request without needing a Reload. Structural changes
+// (VectorStoreNamespace, Dimension) trigger an EnsureNamespace call so the
+// plugin can begin writing to the new namespace immediately.
+func (h *LocalCacheHandler) updateConfig(ctx *fasthttp.RequestCtx) {
+	if h.store.ConfigStore == nil {
+		SendError(ctx, fasthttp.StatusServiceUnavailable, "config store not initialized")
+		return
+	}
+	var payload configstore.LocalCacheConfig
+	if err := json.Unmarshal(ctx.PostBody(), &payload); err != nil {
+		SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid request format: %v", err))
+		return
+	}
+	if err := validateLocalCachePayload(&payload); err != nil {
+		SendError(ctx, fasthttp.StatusBadRequest, err.Error())
+		return
+	}
+	// Capture the old structural fields so we can decide whether to call
+	// EnsureNamespace on the running plugin.
+	oldNamespace := ""
+	oldDimension := 0
+	if h.store.LocalCacheConfig != nil {
+		oldNamespace = h.store.LocalCacheConfig.VectorStoreNamespace
+		oldDimension = h.store.LocalCacheConfig.Dimension
+	}
+
+	// Refresh hash so config-sync sees this as the current state.
+	if hash, hashErr := payload.GenerateLocalCacheConfigHash(); hashErr == nil {
+		payload.ConfigHash = hash
+	}
+	if err := h.store.ConfigStore.UpdateLocalCacheConfig(ctx, &payload); err != nil {
+		SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("failed to persist local cache config: %v", err))
+		return
+	}
+	// Mutate the shared pointer in place so the plugin sees the new values
+	// on its next read. We don't reload the plugin — only the on/off toggle
+	// (PUT /api/config with EnableLocalCache flip) does that.
+	if err := h.configManager.ReloadLocalCacheConfigFromConfigStore(ctx); err != nil {
+		SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("failed to reload local cache config: %v", err))
+		return
+	}
+	// Structural change — namespace or dimension differs from what the
+	// plugin currently has materialized. Call EnsureNamespace so the next
+	// request lands on a valid namespace. Old data is left in the previous
+	// namespace by design (no flush on dimension change, per spec).
+	if payload.VectorStoreNamespace != oldNamespace || payload.Dimension != oldDimension {
+		if plugin, err := lib.FindPluginAs[namespaceEnsurer](h.store, localcache.PluginName); err == nil && plugin != nil {
+			if ensureErr := plugin.EnsureNamespace(context.Background()); ensureErr != nil {
+				logger.Warn("local cache config persisted but EnsureNamespace failed: %v", ensureErr)
+				SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("config saved but namespace setup failed: %v", ensureErr))
+				return
+			}
+		}
+		// Plugin not loaded (toggle off) — namespace will be created at
+		// next ReloadPlugin via Init. No action needed here.
+	}
+	SendJSON(ctx, payload)
+}
+
+// validateLocalCachePayload mirrors the legacy validateLocalCacheConfig
+// rules in lib/config.go but operates on the typed configstore struct so
+// the dedicated REST surface enforces the same invariants as config.json.
+func validateLocalCachePayload(p *configstore.LocalCacheConfig) error {
+	if p.Dimension < 1 {
+		return fmt.Errorf("local_cache 'dimension' must be >= 1, got %d", p.Dimension)
+	}
+	if p.Provider != "" {
+		if p.Dimension <= 1 {
+			return fmt.Errorf("local_cache 'dimension' must be > 1 when 'provider' is set; use dimension=1 for direct-only mode without a provider")
+		}
+		if p.EmbeddingModel == "" {
+			return fmt.Errorf("local_cache 'embedding_model' is required when 'provider' is set")
+		}
+	}
+	if p.TTL < 0 {
+		return fmt.Errorf("local_cache 'ttl' must be non-negative")
+	}
+	if p.Threshold < 0 || p.Threshold > 1 {
+		return fmt.Errorf("local_cache 'threshold' must be in [0,1]")
+	}
+	return nil
+}
diff --git a/transports/bifrost-http/lib/config.go b/transports/bifrost-http/lib/config.go
index fac5a42dde..4fd98787fd 100644
--- a/transports/bifrost-http/lib/config.go
+++ b/transports/bifrost-http/lib/config.go
@@ -42,7 +42,7 @@ import (
 	"github.com/maximhq/bifrost/plugins/maxim"
 	"github.com/maximhq/bifrost/plugins/otel"
 	"github.com/maximhq/bifrost/plugins/prompts"
-	"github.com/maximhq/bifrost/plugins/semanticcache"
+	"github.com/maximhq/bifrost/plugins/localcache"
 	"github.com/maximhq/bifrost/plugins/telemetry"
 	"gorm.io/gorm"
 )
@@ -121,7 +121,7 @@ func IsBuiltinPlugin(name string) bool {
 		name == governance.PluginName ||
 		name == compat.PluginName ||
 		name == maxim.PluginName ||
-		name == semanticcache.PluginName ||
+		name == localcache.PluginName ||
 		name == otel.PluginName
 }
 
@@ -151,6 +151,7 @@ type ConfigData struct {
 	VectorStoreConfig *vectorstore.Config                   `json:"vector_store,omitempty"`
 	ConfigStoreConfig *configstore.Config                   `json:"config_store,omitempty"`
 	LogsStoreConfig   *logstore.Config                      `json:"logs_store,omitempty"`
+	LocalCache        *configstore.LocalCacheConfig         `json:"local_cache,omitempty"`
 	Plugins           []*schemas.PluginConfig               `json:"plugins,omitempty"`
 	WebSocket         *schemas.WebSocketConfig              `json:"websocket,omitempty"`
 }
@@ -172,6 +173,7 @@ func (cd *ConfigData) UnmarshalJSON(data []byte) error {
 		VectorStoreConfig json.RawMessage                       `json:"vector_store,omitempty"`
 		ConfigStoreConfig json.RawMessage                       `json:"config_store,omitempty"`
 		LogsStoreConfig   json.RawMessage                       `json:"logs_store,omitempty"`
+		LocalCache        *configstore.LocalCacheConfig         `json:"local_cache,omitempty"`
 		Plugins           []*schemas.PluginConfig               `json:"plugins,omitempty"`
 		WebSocket         *schemas.WebSocketConfig              `json:"websocket,omitempty"`
 	}
@@ -189,6 +191,7 @@ func (cd *ConfigData) UnmarshalJSON(data []byte) error {
 	cd.Providers = temp.Providers
 	cd.MCP = temp.MCP
 	cd.Governance = temp.Governance
+	cd.LocalCache = temp.LocalCache
 	cd.Plugins = temp.Plugins
 	cd.WebSocket = temp.WebSocket
 	// Initialize providers map if nil
@@ -262,6 +265,11 @@ type Config struct {
 	Providers        map[schemas.ModelProvider]configstore.ProviderConfig
 	MCPConfig        *schemas.MCPConfig
 	GovernanceConfig *configstore.GovernanceConfig
+	// LocalCacheConfig is the live, mutable configuration for the local cache
+	// plugin. The plugin holds the same pointer; PUT /api/local-cache/config
+	// rewrites *LocalCacheConfig in place so the plugin sees fresh values
+	// on the next request without needing a Reload.
+	LocalCacheConfig *configstore.LocalCacheConfig
 	FrameworkConfig  *framework.FrameworkConfig
 	ProxyConfig      *configstoreTables.GlobalProxyConfig
 
@@ -529,7 +537,11 @@ func LoadConfig(ctx context.Context, configDirPath string) (*Config, error) {
 	loadGovernanceConfig(ctx, config, &configData)
 	// 8. Auth config
 	loadAuthConfig(ctx, config, &configData)
-	// 9. Plugins
+	// 9. Local cache config (must run before loadPlugins so the boot
+	// migration from config_plugins[semantic_cache] can land before the
+	// plugin layer reads its config)
+	loadLocalCacheConfig(ctx, config, &configData)
+	// 10. Plugins
 	loadPlugins(ctx, config, &configData)
 	// 10. Framework config and pricing manager
 	initFrameworkConfig(ctx, config, &configData)
@@ -767,6 +779,128 @@ func loadClientConfig(ctx context.Context, config *Config, configData *ConfigDat
 	}
 }
 
+// loadLocalCacheConfig loads and merges the local-cache configuration from
+// config.json with the store using hash-based reconciliation. Mirrors
+// loadClientConfig: file-side wins on hash mismatch, DB-side wins when
+// hashes match (preserving UI changes). Also performs the one-time
+// migration of legacy config_plugins[semantic_cache] rows into the new
+// dedicated config_local_cache table.
+func loadLocalCacheConfig(ctx context.Context, config *Config, configData *ConfigData) {
+	if config.ConfigStore != nil {
+		// One-shot migration: if config_plugins still has a row keyed
+		// "semantic_cache", convert it into a config_local_cache row and
+		// delete the old plugin row. Idempotent — does nothing if the new
+		// table is already populated or the legacy row is absent.
+		if err := migrateLegacySemanticCachePluginRow(ctx, config); err != nil {
+			logger.Warn("failed to migrate legacy semantic_cache plugin row: %v", err)
+		}
+	}
+
+	var dbConfig *configstore.LocalCacheConfig
+	var err error
+	if config.ConfigStore != nil {
+		dbConfig, err = config.ConfigStore.GetLocalCacheConfig(ctx)
+		if err != nil {
+			logger.Warn("failed to get local cache config from store: %v", err)
+		}
+	}
+
+	// Case 1: No DB row — seed from file if present, otherwise leave nil
+	// (the plugin stays disabled until config arrives via the REST API).
+	if dbConfig == nil {
+		if configData.LocalCache == nil {
+			logger.Debug("local cache config not found in store or file; plugin will be disabled")
+			return
+		}
+		logger.Debug("seeding local cache config from file")
+		config.LocalCacheConfig = configData.LocalCache
+		if fileHash, hashErr := configData.LocalCache.GenerateLocalCacheConfigHash(); hashErr != nil {
+			logger.Warn("failed to generate local cache config hash: %v", hashErr)
+		} else {
+			config.LocalCacheConfig.ConfigHash = fileHash
+		}
+		if config.ConfigStore != nil {
+			if err = config.ConfigStore.UpdateLocalCacheConfig(ctx, config.LocalCacheConfig); err != nil {
+				logger.Warn("failed to seed local cache config in store: %v", err)
+			}
+		}
+		return
+	}
+
+	// Case 2: DB row exists.
+	config.LocalCacheConfig = dbConfig
+	if configData.LocalCache == nil {
+		logger.Debug("no local cache config in file, using DB config")
+		return
+	}
+	fileHash, hashErr := configData.LocalCache.GenerateLocalCacheConfigHash()
+	if hashErr != nil {
+		logger.Warn("failed to generate local cache config hash from file: %v", hashErr)
+		return
+	}
+	if dbConfig.ConfigHash != fileHash {
+		logger.Info("local cache config was updated in config.json, syncing. Note: file config takes precedence.")
+		config.LocalCacheConfig = configData.LocalCache
+		config.LocalCacheConfig.ConfigHash = fileHash
+		if config.ConfigStore != nil {
+			if err = config.ConfigStore.UpdateLocalCacheConfig(ctx, config.LocalCacheConfig); err != nil {
+				logger.Warn("failed to update local cache config in store from file: %v", err)
+			}
+		}
+	} else {
+		logger.Debug("local cache config hash matches, keeping DB config")
+	}
+}
+
+// migrateLegacySemanticCachePluginRow performs the one-time migration of a
+// pre-rename config_plugins['semantic_cache'] row into the new dedicated
+// config_local_cache table + EnableLocalCache flag. Idempotent: returns
+// nil immediately when the legacy row is absent or the new table already
+// has data.
+func migrateLegacySemanticCachePluginRow(ctx context.Context, config *Config) error {
+	if config.ConfigStore == nil {
+		return nil
+	}
+	// If the new table already has data, the migration has already run (or
+	// the user has configured the local cache via the new path).
+	existing, err := config.ConfigStore.GetLocalCacheConfig(ctx)
+	if err == nil && existing != nil {
+		return nil
+	}
+	legacy, err := config.ConfigStore.GetPlugin(ctx, "semantic_cache")
+	if err != nil || legacy == nil {
+		return nil
+	}
+	logger.Info("migrating legacy semantic_cache plugin row to config_local_cache table")
+	// The legacy ConfigJSON shape is the old plugin Config. Decode into
+	// the new LocalCacheConfig (which is field-compatible).
+	var migrated configstore.LocalCacheConfig
+	if legacy.ConfigJSON != "" {
+		if err := json.Unmarshal([]byte(legacy.ConfigJSON), &migrated); err != nil {
+			return fmt.Errorf("failed to decode legacy semantic_cache config json: %w", err)
+		}
+	}
+	if hash, hashErr := migrated.GenerateLocalCacheConfigHash(); hashErr == nil {
+		migrated.ConfigHash = hash
+	}
+	if err := config.ConfigStore.UpdateLocalCacheConfig(ctx, &migrated); err != nil {
+		return fmt.Errorf("failed to write migrated local cache config: %w", err)
+	}
+	// Carry the legacy enabled flag onto ClientConfig.EnableLocalCache so
+	// the toggle preserves user intent across the rename.
+	if config.ClientConfig != nil {
+		enabled := legacy.Enabled
+		config.ClientConfig.EnableLocalCache = &enabled
+		if err := config.ConfigStore.UpdateClientConfig(ctx, config.ClientConfig); err != nil {
+			logger.Warn("failed to persist EnableLocalCache flag during migration: %v", err)
+		}
+	}
+	if err := config.ConfigStore.DeletePlugin(ctx, "semantic_cache"); err != nil {
+		logger.Warn("failed to delete legacy semantic_cache plugin row: %v", err)
+	}
+	return nil
+}
+
 // loadProviders loads and merges providers from file with store using hash reconciliation
 func loadProviders(ctx context.Context, config *Config, configData *ConfigData) error {
 	var providersInConfigStore map[schemas.ModelProvider]configstore.ProviderConfig
@@ -2631,11 +2765,6 @@ func loadPlugins(ctx context.Context, config *Config, configData *ConfigData) {
 					Placement: plugin.Placement,
 					Order:     plugin.Order,
 				}
-				if plugin.Name == semanticcache.PluginName {
-					if err := config.ValidateSemanticCacheConfig(pluginConfig); err != nil {
-						logger.Warn("failed to validate semantic cache config: %v", err)
-					}
-				}
 				config.PluginConfigs[i] = pluginConfig
 			}
 		}
@@ -4968,126 +5097,6 @@ func ValidateCustomProviderUpdate(newConfig, existingConfig configstore.Provider
 	return nil
 }
 
-func (c *Config) ValidateSemanticCacheConfig(config *schemas.PluginConfig) error {
-	if config.Name != semanticcache.PluginName {
-		return nil
-	}
-
-	// Check if config.Config exists
-	if config.Config == nil {
-		return fmt.Errorf("semantic_cache plugin config is nil")
-	}
-
-	// Type assert config.Config to map[string]interface{}
-	configMap, ok := config.Config.(map[string]interface{})
-	if !ok {
-		return fmt.Errorf("semantic_cache plugin config must be a map, got %T", config.Config)
-	}
-
-	dimension, hasDimension, err := semanticCacheConfigDimension(configMap)
-	if err != nil {
-		return err
-	}
-
-	// Check if provider key exists and is a string
-	providerVal, exists := configMap["provider"]
-	if !exists {
-		if hasDimension && dimension == 1 {
-			delete(configMap, "keys")
-			delete(configMap, "embedding_model")
-			return nil
-		}
-		return fmt.Errorf("semantic_cache plugin requires 'provider' for semantic mode (dimension > 1). For direct-only mode, set dimension: 1 and omit provider")
-	}
-
-	provider, ok := providerVal.(string)
-	if !ok {
-		return fmt.Errorf("semantic_cache plugin 'provider' field must be a string, got %T", providerVal)
-	}
-	provider = strings.TrimSpace(provider)
-	configMap["provider"] = provider
-
-	if provider == "" {
-		if hasDimension && dimension == 1 {
-			delete(configMap, "provider")
-			delete(configMap, "keys")
-			delete(configMap, "embedding_model")
-			return nil
-		}
-		return fmt.Errorf("semantic_cache plugin requires a non-empty 'provider' for semantic mode (dimension > 1). For direct-only mode, set dimension: 1 and omit provider")
-	}
-	if !hasDimension {
-		return fmt.Errorf("semantic_cache plugin requires 'dimension' for provider-backed semantic mode. For direct-only mode, set dimension: 1 and omit provider")
-	}
-	if dimension <= 1 {
-		return fmt.Errorf("semantic_cache plugin requires 'dimension' > 1 when 'provider' is set. Use dimension: 1 only for direct-only mode without a provider")
-	}
-
-	embeddingModelVal, exists := configMap["embedding_model"]
-	if !exists {
-		return fmt.Errorf("semantic_cache plugin requires 'embedding_model' when 'provider' is set")
-	}
-	embeddingModel, ok := embeddingModelVal.(string)
-	if !ok {
-		return fmt.Errorf("semantic_cache plugin 'embedding_model' field must be a string, got %T", embeddingModelVal)
-	}
-	embeddingModel = strings.TrimSpace(embeddingModel)
-	if embeddingModel == "" {
-		return fmt.Errorf("semantic_cache plugin requires a non-empty 'embedding_model' when 'provider' is set")
-	}
-	configMap["embedding_model"] = embeddingModel
-
-	// Validate that the provider is configured in the global client (keys are inherited automatically).
-	if _, err := c.GetProviderConfigRaw(schemas.ModelProvider(provider)); err != nil {
-		return fmt.Errorf("failed to get provider config for %s: %w", provider, err)
-	}
-
-	return nil
-}
-
-func semanticCacheConfigDimension(configMap map[string]interface{}) (int, bool, error) {
-	dimensionVal, exists := configMap["dimension"]
-	if !exists {
-		return 0, false, nil
-	}
-
-	switch v := dimensionVal.(type) {
-	case int:
-		if v < 1 {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' must be >= 1, got %d", v)
-		}
-		return v, true, nil
-	case int32:
-		if v < 1 {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' must be >= 1, got %d", v)
-		}
-		return int(v), true, nil
-	case int64:
-		if v < 1 {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' must be >= 1, got %d", v)
-		}
-		return int(v), true, nil
-	case float64:
-		if v != math.Trunc(v) {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' field must be an integer, got %v", v)
-		}
-		if v < 1 {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' must be >= 1, got %v", v)
-		}
-		return int(v), true, nil
-	case json.Number:
-		parsed, err := v.Int64()
-		if err != nil {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' field must be an integer, got %q", v)
-		}
-		if parsed < 1 {
-			return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' must be >= 1, got %d", parsed)
-		}
-		return int(parsed), true, nil
-	default:
-		return 0, false, fmt.Errorf("semantic_cache plugin 'dimension' field must be numeric, got %T", dimensionVal)
-	}
-}
 
 func DeepCopy[T any](in T) (T, error) {
 	var out T
diff --git a/transports/bifrost-http/lib/config_test.go b/transports/bifrost-http/lib/config_test.go
index 07dc2adeb6..800618447a 100644
--- a/transports/bifrost-http/lib/config_test.go
+++ b/transports/bifrost-http/lib/config_test.go
@@ -388,6 +388,7 @@ type MockConfigStore struct {
 	authConfig       *configstore.AuthConfig
 	frameworkConfig  *tables.TableFrameworkConfig
 	vectorConfig     *vectorstore.Config
+	localCacheConfig *configstore.LocalCacheConfig
 	logsConfig       *logstore.Config
 	plugins          []*tables.TablePlugin
 
@@ -879,6 +880,16 @@ func (m *MockConfigStore) GetVectorStoreConfig(ctx context.Context) (*vectorstor
 	return m.vectorConfig, nil
 }
 
+// Local cache config
+func (m *MockConfigStore) GetLocalCacheConfig(ctx context.Context) (*configstore.LocalCacheConfig, error) {
+	return m.localCacheConfig, nil
+}
+
+func (m *MockConfigStore) UpdateLocalCacheConfig(ctx context.Context, config *configstore.LocalCacheConfig) error {
+	m.localCacheConfig = config
+	return nil
+}
+
 // Logs store config
 func (m *MockConfigStore) UpdateLogsStoreConfig(ctx context.Context, config *logstore.Config) error {
 	m.logsConfig = config
@@ -18189,3 +18200,4 @@ func TestVersionField_Version2_NoCompat(t *testing.T) {
 	require.Empty(t, anthropicCfg.Keys[0].Models,
 		"v2 semantics: empty models must NOT be normalised")
 }
+
diff --git a/transports/bifrost-http/lib/ctx.go b/transports/bifrost-http/lib/ctx.go
index e1069b6024..4bd9f6930c 100644
--- a/transports/bifrost-http/lib/ctx.go
+++ b/transports/bifrost-http/lib/ctx.go
@@ -19,7 +19,7 @@ import (
 	"github.com/maximhq/bifrost/core/schemas"
 	"github.com/maximhq/bifrost/plugins/governance"
 	"github.com/maximhq/bifrost/plugins/maxim"
-	"github.com/maximhq/bifrost/plugins/semanticcache"
+	"github.com/maximhq/bifrost/plugins/localcache"
 	"github.com/valyala/fasthttp"
 )
 
@@ -359,7 +359,7 @@ func ConvertToBifrostContext(ctx *fasthttp.RequestCtx, store HandlerStore) (*sch
 		}
 		// Handle cache key header (x-bf-cache-key)
 		if keyStr == "x-bf-cache-key" {
-			bifrostCtx.SetValue(semanticcache.CacheKey, string(value))
+			bifrostCtx.SetValue(localcache.CacheKey, string(value))
 			return true
 		}
 		// Handle cache TTL header (x-bf-cache-ttl)
@@ -378,7 +378,7 @@ func ConvertToBifrostContext(ctx *fasthttp.RequestCtx, store HandlerStore) (*sch
 			}
 
 			if err == nil {
-				bifrostCtx.SetValue(semanticcache.CacheTTLKey, ttlDuration)
+				bifrostCtx.SetValue(localcache.CacheTTLKey, ttlDuration)
 			}
 			// If both parsing attempts fail, we silently ignore the header and use default TTL
 			return true
@@ -393,20 +393,20 @@ func ConvertToBifrostContext(ctx *fasthttp.RequestCtx, store HandlerStore) (*sch
 				} else if threshold > 1.0 {
 					threshold = 1.0
 				}
-				bifrostCtx.SetValue(semanticcache.CacheThresholdKey, threshold)
+				bifrostCtx.SetValue(localcache.CacheThresholdKey, threshold)
 			}
 			// If parsing fails, silently ignore the header (no context value set)
 			return true
 		}
 		// Cache type header
 		if keyStr == "x-bf-cache-type" {
-			bifrostCtx.SetValue(semanticcache.CacheTypeKey, semanticcache.CacheType(string(value)))
+			bifrostCtx.SetValue(localcache.CacheTypeKey, localcache.CacheType(string(value)))
 			return true
 		}
 		// Cache no store header
 		if keyStr == "x-bf-cache-no-store" {
 			if valueStr := string(value); valueStr == "true" {
-				bifrostCtx.SetValue(semanticcache.CacheNoStoreKey, true)
+				bifrostCtx.SetValue(localcache.CacheNoStoreKey, true)
 			}
 			return true
 		}
diff --git a/transports/bifrost-http/lib/semantic_cache_config_test.go b/transports/bifrost-http/lib/semantic_cache_config_test.go
deleted file mode 100644
index 2d79bd9526..0000000000
--- a/transports/bifrost-http/lib/semantic_cache_config_test.go
+++ /dev/null
@@ -1,170 +0,0 @@
-package lib
-
-import (
-	"testing"
-
-	"github.com/maximhq/bifrost/core/schemas"
-	"github.com/maximhq/bifrost/framework/configstore"
-	"github.com/maximhq/bifrost/plugins/semanticcache"
-	"github.com/stretchr/testify/require"
-)
-
-func TestValidateSemanticCacheConfig_DirectOnlyMode(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"dimension": 1,
-			"ttl":       "5m",
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.NoError(t, err)
-
-	configMap, ok := pluginConfig.Config.(map[string]interface{})
-	require.True(t, ok)
-	_, hasKeys := configMap["keys"]
-	require.False(t, hasKeys, "direct-only mode should not inject provider keys")
-}
-
-func TestValidateSemanticCacheConfig_DirectOnlyModeRemovesStaleProviderBackedFields(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"dimension":       1,
-			"keys":            []schemas.Key{{Name: "stale-key"}},
-			"embedding_model": "text-embedding-3-small",
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.NoError(t, err)
-
-	configMap, ok := pluginConfig.Config.(map[string]interface{})
-	require.True(t, ok)
-	_, hasEmbeddingModel := configMap["embedding_model"]
-	require.False(t, hasEmbeddingModel, "direct-only mode should remove stale embedding_model")
-}
-
-func TestValidateSemanticCacheConfig_ProviderBackedModeValidationPasses(t *testing.T) {
-	config := &Config{
-		Providers: map[schemas.ModelProvider]configstore.ProviderConfig{
-			schemas.OpenAI: {
-				Keys: []schemas.Key{
-					{
-						Name:   "openai-key",
-						Value:  *schemas.NewEnvVar("sk-test"),
-						Weight: 1,
-					},
-				},
-			},
-		},
-	}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"provider":        "openai",
-			"embedding_model": "text-embedding-3-small",
-			"dimension":       1536,
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.NoError(t, err)
-
-	configMap, ok := pluginConfig.Config.(map[string]interface{})
-	require.True(t, ok)
-	_, hasKeys := configMap["keys"]
-	require.False(t, hasKeys, "keys are inherited from global client; they must not be injected into the plugin config")
-	require.Equal(t, "openai", configMap["provider"])
-}
-
-func TestValidateSemanticCacheConfig_SemanticModeMissingProvider(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"dimension": 1536,
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "requires 'provider' for semantic mode")
-}
-
-func TestValidateSemanticCacheConfig_ProviderBackedModeMissingDimension(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"provider":        "openai",
-			"embedding_model": "text-embedding-3-small",
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "requires 'dimension' for provider-backed semantic mode")
-}
-
-func TestValidateSemanticCacheConfig_ProviderBackedModeDimensionOne(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"provider":        "openai",
-			"embedding_model": "text-embedding-3-small",
-			"dimension":       1,
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "requires 'dimension' > 1")
-}
-
-func TestValidateSemanticCacheConfig_ProviderBackedModeMissingEmbeddingModel(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"provider":  "openai",
-			"dimension": 1536,
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "requires 'embedding_model'")
-}
-
-func TestValidateSemanticCacheConfig_InvalidDimensionZero(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"dimension": 0,
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "'dimension' must be >= 1")
-}
-
-func TestValidateSemanticCacheConfig_InvalidDimensionNegative(t *testing.T) {
-	config := &Config{}
-	pluginConfig := &schemas.PluginConfig{
-		Name: semanticcache.PluginName,
-		Config: map[string]interface{}{
-			"dimension": -1,
-		},
-	}
-
-	err := config.ValidateSemanticCacheConfig(pluginConfig)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "'dimension' must be >= 1")
-}
diff --git a/transports/bifrost-http/lib/validator_test.go b/transports/bifrost-http/lib/validator_test.go
index 85094cbdc8..1655bc4274 100644
--- a/transports/bifrost-http/lib/validator_test.go
+++ b/transports/bifrost-http/lib/validator_test.go
@@ -1020,171 +1020,128 @@ func TestValidateConfigSchema_Plugin_MissingName(t *testing.T) {
 }
 
 // =============================================================================
-// Semantic Cache Plugin Config Required Fields Tests
+// Local Cache Top-Level Config Required Fields Tests
 // =============================================================================
 
-func TestValidateConfigSchema_SemanticCachePlugin_Valid(t *testing.T) {
-	// Valid semantic cache plugin with provider, embedding model, and dimension. Keys are injected at runtime.
+// LocalCacheConfig schema tests exercise the top-level "local_cache" block
+// added in the v1.5.0 rename. Plugin entries no longer carry local-cache
+// config — it lives next to "client", "vector_store", and "governance" at
+// the root of config.json, so these tests live there too.
+
+func TestValidateConfigSchema_LocalCacheConfig_Valid(t *testing.T) {
 	validConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"provider": "openai",
-					"embedding_model": "text-embedding-3-small",
-					"dimension": 1536
-				}
-			}
-		]
+		"local_cache": {
+			"provider": "openai",
+			"embedding_model": "text-embedding-3-small",
+			"dimension": 1536
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(validConfig), loadLocalSchema(t))
 	if err != nil {
-		t.Errorf("expected valid semantic cache plugin config to pass validation, got error: %v", err)
+		t.Errorf("expected valid local cache config to pass validation, got error: %v", err)
 	}
 }
 
-func TestValidateConfigSchema_SemanticCachePlugin_MissingProvider(t *testing.T) {
-	// Missing required field: provider for semantic mode (dimension > 1)
+func TestValidateConfigSchema_LocalCacheConfig_MissingProvider(t *testing.T) {
+	// Provider+model are required for semantic mode (dimension > 1).
 	invalidConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"dimension": 1536
-				}
-			}
-		]
+		"local_cache": {
+			"dimension": 1536
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(invalidConfig), loadLocalSchema(t))
 	if err == nil {
-		t.Error("expected config missing 'provider' in semantic cache plugin to fail validation")
+		t.Error("expected provider-backed local cache config missing 'provider' to fail validation")
 	}
 }
 
-func TestValidateConfigSchema_SemanticCachePlugin_ProviderWithoutKeys(t *testing.T) {
-	// Keys are not required at schema level for provider-backed config.
-	validConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"provider": "openai",
-					"embedding_model": "text-embedding-3-small",
-					"dimension": 1536
-				}
-			}
-		]
-	}`
-
-	err := ValidateConfigSchema([]byte(validConfig), loadLocalSchema(t))
-	if err != nil {
-		t.Errorf("expected provider-backed semantic cache config without plugin keys to pass validation, got error: %v", err)
-	}
-}
-
-func TestValidateConfigSchema_SemanticCachePlugin_ProviderWithoutEmbeddingModel(t *testing.T) {
+func TestValidateConfigSchema_LocalCacheConfig_ProviderWithoutEmbeddingModel(t *testing.T) {
 	invalidConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"provider": "openai",
-					"dimension": 1536
-				}
-			}
-		]
+		"local_cache": {
+			"provider": "openai",
+			"dimension": 1536
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(invalidConfig), loadLocalSchema(t))
 	if err == nil {
-		t.Error("expected provider-backed semantic cache config without embedding_model to fail validation")
+		t.Error("expected provider-backed local cache config without embedding_model to fail validation")
 	}
 }
 
-func TestValidateConfigSchema_SemanticCachePlugin_DirectModeValid(t *testing.T) {
+func TestValidateConfigSchema_LocalCacheConfig_DirectModeValid(t *testing.T) {
 	validConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"dimension": 1
-				}
-			}
-		]
+		"local_cache": {
+			"dimension": 1
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(validConfig), loadLocalSchema(t))
 	if err != nil {
-		t.Errorf("expected direct-only semantic cache config to pass validation, got error: %v", err)
+		t.Errorf("expected direct-only local cache config to pass validation, got error: %v", err)
 	}
 }
 
-func TestValidateConfigSchema_SemanticCachePlugin_DirectModeWithEmbeddingModelInvalid(t *testing.T) {
+func TestValidateConfigSchema_LocalCacheConfig_DirectModeWithEmbeddingModelInvalid(t *testing.T) {
 	invalidConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"dimension": 1,
-					"embedding_model": "text-embedding-3-small"
-				}
-			}
-		]
+		"local_cache": {
+			"dimension": 1,
+			"embedding_model": "text-embedding-3-small"
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(invalidConfig), loadLocalSchema(t))
 	if err == nil {
-		t.Error("expected direct-only semantic cache config with embedding_model to fail validation")
+		t.Error("expected direct-only local cache config with embedding_model to fail validation")
 	}
 }
 
-func TestValidateConfigSchema_SemanticCachePlugin_DimensionOneWithProviderInvalid(t *testing.T) {
+func TestValidateConfigSchema_LocalCacheConfig_DimensionOneWithProviderInvalid(t *testing.T) {
 	invalidConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"provider": "openai",
-					"embedding_model": "text-embedding-3-small",
-					"dimension": 1
-				}
-			}
-		]
+		"local_cache": {
+			"provider": "openai",
+			"embedding_model": "text-embedding-3-small",
+			"dimension": 1
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(invalidConfig), loadLocalSchema(t))
 	if err == nil {
-		t.Error("expected dimension: 1 with provider in semantic cache plugin to fail validation")
+		t.Error("expected dimension: 1 with provider in local cache config to fail validation")
 	}
 }
 
-func TestValidateConfigSchema_SemanticCachePlugin_MissingDimension(t *testing.T) {
-	// Missing required field: dimension
+func TestValidateConfigSchema_LocalCacheConfig_MissingDimension(t *testing.T) {
 	invalidConfig := `{
-		"plugins": [
-			{
-				"enabled": true,
-				"name": "semantic_cache",
-				"config": {
-					"provider": "openai",
-					"embedding_model": "text-embedding-3-small"
-				}
-			}
-		]
+		"local_cache": {
+			"provider": "openai",
+			"embedding_model": "text-embedding-3-small"
+		}
 	}`
 
 	err := ValidateConfigSchema([]byte(invalidConfig), loadLocalSchema(t))
 	if err == nil {
-		t.Error("expected config missing 'dimension' in semantic cache plugin to fail validation")
+		t.Error("expected local cache config missing 'dimension' to fail validation")
+	}
+}
+
+func TestValidateConfigSchema_LocalCacheConfig_EnableFlagInClient(t *testing.T) {
+	// The toggle lives on client.enable_local_cache, not on the block
+	// itself — make sure the schema accepts it there.
+	validConfig := `{
+		"client": {
+			"enable_local_cache": true
+		},
+		"local_cache": {
+			"dimension": 1
+		}
+	}`
+
+	err := ValidateConfigSchema([]byte(validConfig), loadLocalSchema(t))
+	if err != nil {
+		t.Errorf("expected client.enable_local_cache + direct-only local_cache to pass validation, got error: %v", err)
 	}
 }
 
diff --git a/transports/bifrost-http/server/plugins.go b/transports/bifrost-http/server/plugins.go
index c0137d6e6b..2196689cd8 100644
--- a/transports/bifrost-http/server/plugins.go
+++ b/transports/bifrost-http/server/plugins.go
@@ -12,7 +12,7 @@ import (
 	"github.com/maximhq/bifrost/plugins/maxim"
 	"github.com/maximhq/bifrost/plugins/otel"
 	"github.com/maximhq/bifrost/plugins/prompts"
-	"github.com/maximhq/bifrost/plugins/semanticcache"
+	"github.com/maximhq/bifrost/plugins/localcache"
 	"github.com/maximhq/bifrost/plugins/telemetry"
 	"github.com/maximhq/bifrost/transports/bifrost-http/handlers"
 	"github.com/maximhq/bifrost/transports/bifrost-http/lib"
@@ -91,12 +91,16 @@ func loadBuiltinPlugin(ctx context.Context, name string, pluginConfig any, bifro
 		}
 		return maxim.Init(maximConfig, logger)
 
-	case semanticcache.PluginName:
-		semanticConfig, err := MarshalPluginConfig[semanticcache.Config](pluginConfig)
-		if err != nil {
-			return nil, fmt.Errorf("failed to marshal semantic cache plugin config: %w", err)
+	case localcache.PluginName:
+		// The local cache reads its live configuration from
+		// bifrostConfig.LocalCacheConfig (a shared pointer that PUT
+		// /api/local-cache/config mutates in place). pluginConfig is
+		// ignored — config flows through the dedicated config_local_cache
+		// table, not through config_plugins.
+		if bifrostConfig.LocalCacheConfig == nil {
+			return nil, fmt.Errorf("local cache config not loaded; cannot initialize plugin")
 		}
-		return semanticcache.Init(ctx, semanticConfig, logger, bifrostConfig.VectorStore)
+		return localcache.Init(ctx, bifrostConfig.LocalCacheConfig, logger, bifrostConfig.VectorStore)
 
 	case otel.PluginName:
 		otelConfig, err := MarshalPluginConfig[otel.Config](pluginConfig)
@@ -206,14 +210,16 @@ func (s *BifrostHTTPServer) loadBuiltinPlugins(ctx context.Context) error {
 	}
 	s.Config.SetPluginOrderInfo(otel.PluginName, builtinPlacement, schemas.Ptr(5))
 
-	// 6. Semantic Cache (if configured in PluginConfigs)
-	semanticCacheConfig := s.getPluginConfig(semanticcache.PluginName)
-	if semanticCacheConfig != nil && semanticCacheConfig.Enabled {
-		s.registerPluginWithStatus(ctx, semanticcache.PluginName, nil, semanticCacheConfig.Config, false)
+	// 6. Local Cache (if EnableLocalCache flag is set on ClientConfig and
+	// the dedicated config_local_cache row is present). pluginConfig is nil
+	// since the plugin reads from bifrostConfig.LocalCacheConfig directly.
+	enableLocalCache := s.Config.ClientConfig.EnableLocalCache != nil && *s.Config.ClientConfig.EnableLocalCache
+	if enableLocalCache && s.Config.LocalCacheConfig != nil && s.Config.VectorStore != nil {
+		s.registerPluginWithStatus(ctx, localcache.PluginName, nil, nil, false)
 	} else {
-		s.markPluginDisabled(semanticcache.PluginName)
+		s.markPluginDisabled(localcache.PluginName)
 	}
-	s.Config.SetPluginOrderInfo(semanticcache.PluginName, builtinPlacement, schemas.Ptr(6))
+	s.Config.SetPluginOrderInfo(localcache.PluginName, builtinPlacement, schemas.Ptr(6))
 
 	// 7. Compat (if any compat feature is enabled in ClientConfig)
 	cc := s.Config.ClientConfig.Compat
diff --git a/transports/bifrost-http/server/server.go b/transports/bifrost-http/server/server.go
index bb64dce034..d95492bb74 100644
--- a/transports/bifrost-http/server/server.go
+++ b/transports/bifrost-http/server/server.go
@@ -26,7 +26,7 @@ import (
 	"github.com/maximhq/bifrost/plugins/governance"
 	"github.com/maximhq/bifrost/plugins/logging"
 	"github.com/maximhq/bifrost/plugins/prompts"
-	"github.com/maximhq/bifrost/plugins/semanticcache"
+	"github.com/maximhq/bifrost/plugins/localcache"
 	"github.com/maximhq/bifrost/plugins/telemetry"
 	"github.com/maximhq/bifrost/transports/bifrost-http/handlers"
 	"github.com/maximhq/bifrost/transports/bifrost-http/integrations"
@@ -59,6 +59,8 @@ type ServerCallbacks interface {
 	// Auth related callbacks
 	UpdateAuthConfig(ctx context.Context, authConfig *configstore.AuthConfig) error
 	ReloadClientConfigFromConfigStore(ctx context.Context) error
+	// Local cache related callbacks
+	ReloadLocalCacheConfigFromConfigStore(ctx context.Context) error
 	// Pricing related callbacks
 	UpdateSyncConfig(ctx context.Context) error
 	ForceReloadPricing(ctx context.Context) error
@@ -717,6 +719,34 @@ func (s *BifrostHTTPServer) RemoveRoutingRule(ctx context.Context, id string) er
 	return nil
 }
 
+// ReloadLocalCacheConfigFromConfigStore re-reads the local-cache config from
+// the database and mutates *s.Config.LocalCacheConfig in place. The local
+// cache plugin holds the same pointer, so the next request observes the
+// new values without needing a plugin Reload. An absent row is treated as
+// "config cleared" and the existing pointer is left untouched (toggling
+// the plugin off via EnableLocalCache is the right path for that, not
+// zeroing the config struct).
+func (s *BifrostHTTPServer) ReloadLocalCacheConfigFromConfigStore(ctx context.Context) error {
+	if s.Config == nil || s.Config.ConfigStore == nil {
+		return fmt.Errorf("config store not found")
+	}
+	dbConfig, err := s.Config.ConfigStore.GetLocalCacheConfig(context.Background())
+	if err != nil {
+		return fmt.Errorf("failed to get local cache config: %v", err)
+	}
+	if dbConfig == nil {
+		return nil
+	}
+	if s.Config.LocalCacheConfig == nil {
+		s.Config.LocalCacheConfig = dbConfig
+		return nil
+	}
+	// In-place mutation through the shared pointer — the running plugin
+	// observes the new values on its next read with no restart.
+	*s.Config.LocalCacheConfig = *dbConfig
+	return nil
+}
+
 // ReloadClientConfigFromConfigStore reloads the client config from config store
 func (s *BifrostHTTPServer) ReloadClientConfigFromConfigStore(ctx context.Context) error {
 	if s.Config == nil || s.Config.ConfigStore == nil {
@@ -1007,8 +1037,8 @@ func (s *BifrostHTTPServer) ReloadPlugin(ctx context.Context, name string, path
 		return s.updatePluginErrorStatus(name, "loading", err)
 	}
 	// Wire the embedding executor on the new instance before syncing.
-	if semanticCachePlugin, ok := plugin.(*semanticcache.Plugin); ok {
-		semanticCachePlugin.SetEmbeddingRequestExecutor(s.Client.EmbeddingRequest)
+	if localCachePlugin, ok := plugin.(*localcache.Plugin); ok {
+		localCachePlugin.SetEmbeddingRequestExecutor(s.Client.EmbeddingRequest)
 	}
 	return s.SyncLoadedPlugin(ctx, name, plugin, placement, order)
 }
@@ -1103,9 +1133,9 @@ func (s *BifrostHTTPServer) RegisterAPIRoutes(ctx context.Context, callbacks Ser
 		}
 	}
 	var cacheHandler *handlers.CacheHandler
-	semanticCachePlugin, _ := lib.FindPluginAs[*semanticcache.Plugin](s.Config, semanticcache.PluginName)
-	if semanticCachePlugin != nil {
-		cacheHandler = handlers.NewCacheHandler(semanticCachePlugin)
+	localCachePlugin, _ := lib.FindPluginAs[*localcache.Plugin](s.Config, localcache.PluginName)
+	if localCachePlugin != nil {
+		cacheHandler = handlers.NewCacheHandler(localCachePlugin)
 	}
 	var promptsReloader handlers.PromptCacheReloader
 	if promptsPlugin, err := lib.FindPluginAs[handlers.PromptCacheReloader](s.Config, s.getPromptsPluginName()); err == nil && promptsPlugin != nil {
@@ -1126,6 +1156,7 @@ func (s *BifrostHTTPServer) RegisterAPIRoutes(ctx context.Context, callbacks Ser
 	oauthHandler := handlers.NewOAuthHandler(s.Config.OAuthProvider, s.Client, s.Config)
 	mcpHandler := handlers.NewMCPHandler(callbacks, callbacks, s.Client, s.Config, oauthHandler)
 	configHandler := handlers.NewConfigHandler(callbacks, s.Config)
+	localCacheHandler := handlers.NewLocalCacheHandler(callbacks, s.Config)
 	pluginsHandler := handlers.NewPluginsHandler(callbacks, s.Config.ConfigStore)
 	sessionHandler := handlers.NewSessionHandler(s.Config.ConfigStore, s.WSTicketStore)
 	promptsHandler := handlers.NewPromptsHandler(s.Config.ConfigStore, promptsReloader)
@@ -1134,6 +1165,7 @@ func (s *BifrostHTTPServer) RegisterAPIRoutes(ctx context.Context, callbacks Ser
 	providerHandler.RegisterRoutes(s.Router, middlewares...)
 	mcpHandler.RegisterRoutes(s.Router, middlewares...)
 	configHandler.RegisterRoutes(s.Router, middlewares...)
+	localCacheHandler.RegisterRoutes(s.Router, middlewares...)
 	oauthHandler.RegisterRoutes(s.Router, middlewares...)
 	// OAuth metadata + per-user OAuth endpoints (no auth middleware — must be publicly accessible)
 	oauthMetadataHandler := handlers.NewOAuthMetadataHandler(s.Config)
@@ -1434,10 +1466,10 @@ func (s *BifrostHTTPServer) Bootstrap(ctx context.Context) error {
 			apiMiddlewares = append(apiMiddlewares, s.AuthMiddleware.APIMiddleware())
 		}
 	}
-	// Add semantic cache plugin embedding request executor if it exists
-	semanticCachePlugin, err := lib.FindPluginAs[*semanticcache.Plugin](s.Config, semanticcache.PluginName)
-	if err == nil && semanticCachePlugin != nil {
-		semanticCachePlugin.SetEmbeddingRequestExecutor(s.Client.EmbeddingRequest)
+	// Add local cache plugin embedding request executor if it exists
+	localCachePlugin, err := lib.FindPluginAs[*localcache.Plugin](s.Config, localcache.PluginName)
+	if err == nil && localCachePlugin != nil {
+		localCachePlugin.SetEmbeddingRequestExecutor(s.Client.EmbeddingRequest)
 	}
 	// Register routes
 	err = s.RegisterAPIRoutes(s.Ctx, s, apiMiddlewares...)
diff --git a/transports/config.schema.json b/transports/config.schema.json
index 885a54001c..fc8143d935 100644
--- a/transports/config.schema.json
+++ b/transports/config.schema.json
@@ -64,6 +64,11 @@
           "type": "boolean",
           "description": "Enable request/response logging"
         },
+        "enable_local_cache": {
+          "type": "boolean",
+          "description": "Enable the local cache plugin (direct hash matching + semantic similarity search). Requires the top-level local_cache block and a configured vector store.",
+          "default": false
+        },
         "disable_content_logging": {
           "type": "boolean",
           "description": "Disable logging of sensitive content (inputs, outputs, embeddings, etc.)"
@@ -1168,6 +1173,128 @@
       },
       "additionalProperties": false
     },
+    "local_cache": {
+      "type": "object",
+      "description": "Local cache plugin configuration. Loaded only when client.enable_local_cache is true. Mutating this block at runtime via PUT /api/local-cache/config takes effect on the next request — the running plugin reads the same struct via a shared pointer.",
+      "properties": {
+        "provider": {
+          "type": "string",
+          "description": "Embedding provider for semantic similarity search. Required when dimension > 1; omit it (and set dimension: 1) for direct-only mode.",
+          "enum": [
+            "",
+            "openai",
+            "anthropic",
+            "gemini",
+            "bedrock",
+            "azure",
+            "cohere",
+            "mistral",
+            "groq",
+            "ollama",
+            "openrouter",
+            "vertex",
+            "cerebras",
+            "vllm",
+            "parasail",
+            "perplexity",
+            "replicate",
+            "sgl",
+            "huggingface"
+          ]
+        },
+        "embedding_model": {
+          "type": "string",
+          "description": "Embedding model name. Required when provider is set; not allowed in direct-only mode."
+        },
+        "cleanup_on_shutdown": {
+          "type": "boolean",
+          "description": "Delete every entry tagged from_bifrost_local_cache_plugin and drop the namespace on plugin Cleanup. Default false leaves entries in place for the next process."
+        },
+        "ttl": {
+          "description": "Time-to-live for cached responses. Accepts a duration string ('5m', '1h') or seconds as an integer. Default 5 minutes.",
+          "oneOf": [
+            {
+              "type": "string",
+              "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$"
+            },
+            {
+              "type": "integer",
+              "minimum": 0
+            }
+          ]
+        },
+        "threshold": {
+          "type": "number",
+          "description": "Cosine similarity threshold for semantic matches (0–1). Default 0.8.",
+          "minimum": 0,
+          "maximum": 1
+        },
+        "vector_store_namespace": {
+          "type": "string",
+          "description": "Namespace within the vector store to scope entries. Default BifrostLocalCachePlugin."
+        },
+        "dimension": {
+          "type": "integer",
+          "description": "Embedding dimension. Use 1 for direct-only mode (no semantic search). Must be > 1 when provider is set.",
+          "minimum": 1
+        },
+        "default_cache_key": {
+          "type": "string",
+          "description": "Cache key used when no per-request key is supplied. Caching is disabled when both this and the per-request key are empty."
+        },
+        "conversation_history_threshold": {
+          "type": "integer",
+          "description": "Skip caching for requests whose conversation history exceeds this many messages. Default 3.",
+          "minimum": 0
+        },
+        "cache_by_model": {
+          "type": "boolean",
+          "description": "Include the model in the cache key. Default true."
+        },
+        "cache_by_provider": {
+          "type": "boolean",
+          "description": "Include the provider in the cache key. Default true."
+        },
+        "exclude_system_prompt": {
+          "type": "boolean",
+          "description": "Exclude the system prompt from the cache key. Default false."
+        }
+      },
+      "required": ["dimension"],
+      "allOf": [
+        {
+          "if": {
+            "properties": {
+              "provider": {
+                "type": "string",
+                "minLength": 1
+              }
+            },
+            "required": ["provider"]
+          },
+          "then": {
+            "required": ["provider", "embedding_model"],
+            "properties": {
+              "dimension": {
+                "type": "integer",
+                "minimum": 2
+              }
+            }
+          },
+          "else": {
+            "not": {
+              "required": ["embedding_model"]
+            },
+            "properties": {
+              "dimension": {
+                "const": 1
+              }
+            }
+          }
+        }
+      ],
+      "additionalProperties": false
+    },
     "plugins": {
       "type": "array",
       "description": "Plugins configuration",
@@ -1181,7 +1308,7 @@
           },
           "name": {
             "type": "string",
-            "description": "Name of the plugin (built-in: telemetry, prompts, logging, governance, maxim, semantic_cache, otel, or custom plugin name)"
+            "description": "Name of the plugin (built-in: telemetry, prompts, logging, governance, maxim, otel, or custom plugin name)"
           },
           "config": {
             "type": "object",
@@ -1394,142 +1521,6 @@
               }
             }
           },
-          {
-            "if": {
-              "properties": {
-                "name": {
-                  "const": "semantic_cache"
-                }
-              }
-            },
-            "then": {
-              "required": ["config"],
-              "properties": {
-                "config": {
-                  "type": "object",
-                  "description": "Configuration for the semantic cache plugin",
-                  "properties": {
-                    "provider": {
-                      "type": "string",
-                      "minLength": 1,
-                      "description": "Provider to use for generating embeddings. Required for semantic search; omit it for direct hash mode with dimension: 1.",
-                      "enum": [
-                        "openai",
-                        "anthropic",
-                        "gemini",
-                        "bedrock",
-                        "azure",
-                        "cohere",
-                        "mistral",
-                        "groq",
-                        "ollama",
-                        "openrouter",
-                        "vertex",
-                        "cerebras",
-                        "vllm",
-                        "parasail",
-                        "perplexity",
-                        "replicate",
-                        "sgl",
-                        "huggingface"
-                      ]
-                    },
-                    "embedding_model": {
-                      "type": "string",
-                      "description": "Model to use for generating embeddings in provider-backed semantic caching. Required when provider is set and not allowed in direct-only mode."
-                    },
-                    "cleanup_on_shutdown": {
-                      "type": "boolean",
-                      "description": "Clean up cache on shutdown (default: false)"
-                    },
-                    "ttl": {
-                      "description": "Time-to-live for cached responses (supports duration strings like '5m', '1h' or seconds as number, default: 5min)",
-                      "oneOf": [
-                        {
-                          "type": "string",
-                          "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$"
-                        },
-                        {
-                          "type": "integer",
-                          "minimum": 0
-                        }
-                      ]
-                    },
-                    "threshold": {
-                      "type": "number",
-                      "description": "Cosine similarity threshold for semantic matching (default: 0.8)",
-                      "minimum": 0,
-                      "maximum": 1
-                    },
-                    "vector_store_namespace": {
-                      "type": "string",
-                      "description": "Namespace for vector store (optional)"
-                    },
-                    "dimension": {
-                      "type": "integer",
-                      "description": "Dimension for vector store embeddings. Use 1 for direct (hash-based) caching without an embedding provider.",
-                      "minimum": 1
-                    },
-                    "default_cache_key": {
-                      "type": "string",
-                      "description": "Default cache key used when no per-request key is provided. When set, all requests without an explicit x-bf-cache-key header will use this value and be cached automatically."
-                    },
-                    "conversation_history_threshold": {
-                      "type": "integer",
-                      "description": "Skip caching for requests with more than this number of messages in conversation history (default: 3)",
-                      "minimum": 0
-                    },
-                    "cache_by_model": {
-                      "type": "boolean",
-                      "description": "Include model in cache key (default: true)"
-                    },
-                    "cache_by_provider": {
-                      "type": "boolean",
-                      "description": "Include provider in cache key (default: true)"
-                    },
-                    "exclude_system_prompt": {
-                      "type": "boolean",
-                      "description": "Exclude system prompt in cache key (default: false)"
-                    }
-                  },
-                  "required": ["dimension"],
-                  "allOf": [
-                    {
-                      "if": {
-                        "properties": {
-                          "provider": {
-                            "type": "string",
-                            "minLength": 1
-                          }
-                        },
-                        "required": ["provider"]
-                      },
-                      "then": {
-                        "required": ["provider", "embedding_model"],
-                        "properties": {
-                          "dimension": {
-                            "type": "integer",
-                            "minimum": 2
-                          }
-                        }
-                      },
-                      "else": {
-                        "not": {
-                          "required": ["embedding_model"]
-                        },
-                        "properties": {
-                          "dimension": {
-                            "const": 1
-                          }
-                        }
-                      }
-                    }
-                  ],
-                  "additionalProperties": false
-                }
-              }
-            }
-          },
           {
             "if": {
               "properties": {
diff --git a/transports/go.mod b/transports/go.mod
index 4df3f5932e..ed4e5d3e04 100644
--- a/transports/go.mod
+++ b/transports/go.mod
@@ -16,11 +16,11 @@ require (
 	github.com/maximhq/bifrost/framework v1.3.7
 	github.com/maximhq/bifrost/plugins/compat v0.1.6
 	github.com/maximhq/bifrost/plugins/governance v1.5.7
+	github.com/maximhq/bifrost/plugins/localcache v1.5.7
 	github.com/maximhq/bifrost/plugins/logging v1.5.7
 	github.com/maximhq/bifrost/plugins/maxim v1.6.7
 	github.com/maximhq/bifrost/plugins/otel v1.2.7
 	github.com/maximhq/bifrost/plugins/prompts v1.0.7
-	github.com/maximhq/bifrost/plugins/semanticcache v1.5.7
 	github.com/maximhq/bifrost/plugins/telemetry v1.5.7
 	github.com/pion/rtcp v1.2.16
 	github.com/pion/webrtc/v4 v4.2.9
@@ -208,3 +208,5 @@ require (
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	gorm.io/driver/postgres v1.6.0 // indirect
 )
+
+replace github.com/maximhq/bifrost/plugins/localcache => ../plugins/localcache
diff --git a/transports/go.sum b/transports/go.sum
index 51793571ec..c6862d1240 100644
--- a/transports/go.sum
+++ b/transports/go.sum
@@ -277,8 +277,6 @@ github.com/maximhq/bifrost/plugins/otel v1.2.7 h1:l9saTMOtNoPCHYZcwCbi8k31MFGELB
 github.com/maximhq/bifrost/plugins/otel v1.2.7/go.mod h1:5v9ciYxjmsOZR0xsFPOp+9VQ2NlXQ96PepwTGKuMUsA=
 github.com/maximhq/bifrost/plugins/prompts v1.0.7 h1:H8vf0Az1uSz2Voek3sBjrzLSVtP+I8DJTgPHzMu06eQ=
 github.com/maximhq/bifrost/plugins/prompts v1.0.7/go.mod h1:Uj2J0yMjfjFg7etd3D/vLnagWeyTZKw9xIm+6gdJ7dM=
-github.com/maximhq/bifrost/plugins/semanticcache v1.5.7 h1:2io/bSnwb5rfbhPZ/1LaD9wrFM7/B12qq1+U1ribZqY=
-github.com/maximhq/bifrost/plugins/semanticcache v1.5.7/go.mod h1:ExoKzIN06LXZA+eKxWVTaGVdnZGPy6FLRGcY/f3mS3k=
 github.com/maximhq/bifrost/plugins/telemetry v1.5.7 h1:EIHEvcU1x+tDaOYO1ddC7ljF8AGB85TboMks5vxb+Tk=
 github.com/maximhq/bifrost/plugins/telemetry v1.5.7/go.mod h1:0nf7Dpmabnyvd3SaubYCXe4IR7YbwBF9tV34oFaH3Ns=
 github.com/maximhq/maxim-go v0.2.1 h1:hCp8dQ4HsyyNC+y5HCUuY/HFD0sOnGkjL5MdYCHkgEQ=
diff --git a/ui/README.md b/ui/README.md
index b66a736511..3d5fc0bdc5 100644
--- a/ui/README.md
+++ b/ui/README.md
@@ -99,7 +99,7 @@ Extend Bifrost with powerful plugins for observability, testing, caching, and cu
 
 - [Maxim Logger](https://docs.getbifrost.ai/features/observability/maxim) - Advanced LLM observability
 - [Response Mocker](https://docs.getbifrost.ai/features/plugins/mocker) - Mock responses for testing
-- [Semantic Cache](https://docs.getbifrost.ai/features/semantic-caching) - Intelligent response caching
+- [Local Cache](https://docs.getbifrost.ai/features/local-caching) - Direct + semantic response caching
 - [OpenTelemetry](https://docs.getbifrost.ai/features/observability/otel) - Distributed tracing
 
 **[Plugin Development Guide →](https://docs.getbifrost.ai/plugins/getting-started)**
diff --git a/ui/app/workspace/config/views/cachingView.tsx b/ui/app/workspace/config/views/cachingView.tsx
index 4c00456cdc..e7f074da75 100644
--- a/ui/app/workspace/config/views/cachingView.tsx
+++ b/ui/app/workspace/config/views/cachingView.tsx
@@ -1,22 +1,235 @@
-import { getErrorMessage, useGetCoreConfigQuery } from "@/lib/store";
-import PluginsForm from "./pluginsForm";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import { ModelMultiselect } from "@/components/ui/modelMultiselect";
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
+import { Switch } from "@/components/ui/switch";
+import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons";
+import { EmbeddingSupportedProviders, getProviderLabel } from "@/lib/constants/logs";
+import {
+	getErrorMessage,
+	useGetCoreConfigQuery,
+	useGetLocalCacheConfigQuery,
+	useGetProvidersQuery,
+	useUpdateCoreConfigMutation,
+	useUpdateLocalCacheConfigMutation,
+} from "@/lib/store";
+import { BifrostConfig, CacheConfig, EditorCacheConfig, ModelProvider, ModelProviderName } from "@/lib/types/config";
+import { cn } from "@/lib/utils";
+import { Loader2 } from "lucide-react";
+import { useEffect, useMemo, useState } from "react";
+import { toast } from "sonner";
+
+// The local cache plugin runs in one of two modes. Direct-only is purely
+// hash-based, no embedding provider needed; perfect for exact-replay
+// caching. Semantic adds vector similarity on top, requiring an
+// embedding-capable provider and the model's real dimension.
+type CacheMode = "direct" | "semantic";
+
+// Embedding-capable providers gate the semantic mode. Built-in providers
+// are listed in EmbeddingSupportedProviders; custom providers expose
+// support via custom_provider_config.allowed_requests.embedding.
+const supportsEmbedding = (provider: ModelProvider): boolean => {
+	if (provider.custom_provider_config) {
+		return provider.custom_provider_config.allowed_requests?.embedding === true;
+	}
+	return (EmbeddingSupportedProviders as readonly string[]).includes(provider.name);
+};
+
+const defaultDirectConfig: EditorCacheConfig = {
+	ttl: 300,
+	threshold: 0.8,
+	dimension: 1,
+	conversation_history_threshold: 3,
+	exclude_system_prompt: false,
+	cache_by_model: true,
+	cache_by_provider: true,
+	cleanup_on_shutdown: false,
+};
+
+// Configs we treat as "the user has nothing saved": both API responses
+// where every field is the type's zero value and the literal undefined
+// look like this.
+const isEmptyConfig = (config: Partial<EditorCacheConfig> | undefined): boolean => {
+	if (!config) return true;
+	const isZero = (v: unknown) => v === undefined || v === null || v === 0 || v === "" || v === false;
+	return Object.values(config).every(isZero);
+};
+
+const toEditorCacheConfig = (config?: Partial<EditorCacheConfig>): EditorCacheConfig => {
+	if (!config || isEmptyConfig(config)) {
+		return { ...defaultDirectConfig };
+	}
+	return { ...defaultDirectConfig, ...config };
+};
+
+const inferMode = (config: EditorCacheConfig): CacheMode => {
+	if (config.dimension && config.dimension > 1 && config.provider) return "semantic";
+	return "direct";
+};
+
+// Strip semantic-only fields when persisting a direct-only payload so the
+// server validator doesn't reject a stale provider choice.
+const buildPayload = (config: EditorCacheConfig, mode: CacheMode): CacheConfig => {
+	const base = {
+		ttl: config.ttl ?? 0,
+		threshold: config.threshold ?? 0,
+		conversation_history_threshold: config.conversation_history_threshold,
+		exclude_system_prompt: config.exclude_system_prompt,
+		cache_by_model: config.cache_by_model,
+		cache_by_provider: config.cache_by_provider,
+		cleanup_on_shutdown: config.cleanup_on_shutdown,
+		vector_store_namespace: config.vector_store_namespace?.trim() || undefined,
+		default_cache_key: config.default_cache_key?.trim() || undefined,
+	};
+	if (mode === "direct") {
+		return { ...base, dimension: 1 } as CacheConfig;
+	}
+	return {
+		...base,
+		provider: config.provider as ModelProviderName,
+		embedding_model: config.embedding_model ?? "",
+		dimension: config.dimension ?? 0,
+	} as CacheConfig;
+};
+
+const validateForSave = (config: EditorCacheConfig, mode: CacheMode): string | null => {
+	if (mode === "semantic") {
+		if (!config.provider) return "Pick an embedding provider for semantic mode, or switch to Direct only.";
+		if (!config.embedding_model?.trim()) return "Pick an embedding model for semantic mode.";
+		if (!config.dimension || config.dimension <= 1) {
+			return "Semantic mode requires the embedding model's real dimension (must be > 1).";
+		}
+	}
+	if (config.ttl !== undefined && config.ttl < 0) return "TTL must be non-negative.";
+	if (config.threshold !== undefined && (config.threshold < 0 || config.threshold > 1)) {
+		return "Similarity threshold must be between 0 and 1.";
+	}
+	return null;
+};
 
 export default function CachingView() {
-	const { data: bifrostConfig, isLoading, error: configError } = useGetCoreConfigQuery({ fromDB: true });
+	const { data: bifrostConfig, isLoading: configLoading, error: configError } = useGetCoreConfigQuery({ fromDB: true });
+	const isVectorStoreEnabled = bifrostConfig?.is_cache_connected ?? false;
+	const enabledOnServer = bifrostConfig?.client_config?.enable_local_cache ?? false;
+
+	const { data: serverConfig, isLoading: localCacheLoading } = useGetLocalCacheConfigQuery();
+	const { data: providersData, error: providersError, isLoading: providersLoading } = useGetProvidersQuery();
+	const providers = useMemo(() => providersData || [], [providersData]);
+	const embeddingProviders = useMemo(() => providers.filter(supportsEmbedding), [providers]);
+
+	const [updateLocalCacheConfig, { isLoading: isSaving }] = useUpdateLocalCacheConfigMutation();
+	const [updateCoreConfig, { isLoading: isToggling }] = useUpdateCoreConfigMutation();
+
+	const [cacheConfig, setCacheConfig] = useState<EditorCacheConfig>(defaultDirectConfig);
+	const [serverCacheConfig, setServerCacheConfig] = useState<EditorCacheConfig>(defaultDirectConfig);
+	const [mode, setMode] = useState<CacheMode>("direct");
+
+	// Hydrate from the server-side config row when it lands.
+	useEffect(() => {
+		if (serverConfig === undefined) return;
+		const editorConfig = toEditorCacheConfig(serverConfig);
+		setCacheConfig(editorConfig);
+		setServerCacheConfig(editorConfig);
+		setMode(inferMode(editorConfig));
+	}, [serverConfig]);
+
+	useEffect(() => {
+		if (providersError) {
+			toast.error(`Failed to load providers: ${getErrorMessage(providersError as any)}`);
+		}
+	}, [providersError]);
+
+	// Surface validation problems inline rather than only on Save click.
+	const validationError = useMemo(() => validateForSave(cacheConfig, mode), [cacheConfig, mode]);
+
+	// Only show the dimension/namespace heads-up when the user has actually
+	// touched a structural field. Showing it permanently in semantic mode
+	// trains users to ignore it; showing it on diff makes it land.
+	const hasStructuralChange = useMemo(() => {
+		return (
+			cacheConfig.provider !== serverCacheConfig.provider ||
+			cacheConfig.embedding_model !== serverCacheConfig.embedding_model ||
+			cacheConfig.dimension !== serverCacheConfig.dimension
+		);
+	}, [cacheConfig, serverCacheConfig]);
+
+	const hasUnsavedConfigChanges = useMemo(() => {
+		const fields: (keyof EditorCacheConfig)[] = [
+			"provider",
+			"embedding_model",
+			"dimension",
+			"ttl",
+			"threshold",
+			"conversation_history_threshold",
+			"exclude_system_prompt",
+			"cache_by_model",
+			"cache_by_provider",
+			"cleanup_on_shutdown",
+			"vector_store_namespace",
+			"default_cache_key",
+		];
+		const changed = fields.some((k) => (cacheConfig[k] ?? "") !== (serverCacheConfig[k] ?? ""));
+		const modeChanged = inferMode(serverCacheConfig) !== mode;
+		return changed || modeChanged;
+	}, [cacheConfig, serverCacheConfig, mode]);
+
+	const updateLocal = (updates: Partial<EditorCacheConfig>) => {
+		setCacheConfig((prev) => ({ ...prev, ...updates }));
+	};
+
+	// Toggle handler. Hits the global config endpoint so the server-side
+	// compat-shim can ReloadPlugin/RemovePlugin transparently. We don't gate
+	// on hasConfigSaved here because Bifrost rejects the enable-true PUT
+	// when no local-cache row exists; we surface that as a toast.
+	const handleToggle = async (checked: boolean) => {
+		if (!bifrostConfig) return;
+		try {
+			const next: BifrostConfig = {
+				...bifrostConfig,
+				client_config: { ...bifrostConfig.client_config, enable_local_cache: checked },
+			};
+			await updateCoreConfig(next).unwrap();
+			toast.success(checked ? "Local cache enabled" : "Local cache disabled");
+		} catch (error) {
+			toast.error(`Failed to ${checked ? "enable" : "disable"} local cache: ${getErrorMessage(error)}`);
+		}
+	};
+
+	const handleSave = async () => {
+		const err = validateForSave(cacheConfig, mode);
+		if (err) {
+			toast.error(err);
+			return;
+		}
+		const payload = buildPayload(cacheConfig, mode);
+		try {
+			const updated = await updateLocalCacheConfig(payload).unwrap();
+			const editor = toEditorCacheConfig(updated);
+			setCacheConfig(editor);
+			setServerCacheConfig(editor);
+			setMode(inferMode(editor));
+			toast.success("Cache configuration updated");
+		} catch (error) {
+			toast.error(`Failed to update cache configuration: ${getErrorMessage(error)}`);
+		}
+	};
+
+	const cachingActive = enabledOnServer && isVectorStoreEnabled;
+	const isLoading = configLoading || localCacheLoading;
 
 	return (
-		<div className="mx-auto w-full max-w-4xl space-y-4">
+		<div className="mx-auto w-full max-w-4xl space-y-6">
 			<div>
-				<h2 className="text-lg font-semibold tracking-tight">Caching</h2>
-				<p className="text-muted-foreground text-sm">Configure semantic caching for requests.</p>
+				<h2 className="text-lg font-semibold tracking-tight">Local Cache</h2>
+				<p className="text-muted-foreground text-sm">
+					Cache responses locally with two complementary lookup paths: <b>direct</b> hash matching for exact replays, and{" "}
+					<b>semantic</b> similarity search for related content. Send the <b>x-bf-cache-key</b> header to scope cached
+					responses to a tenant or feature.
+				</p>
 			</div>
 
-			{isLoading && (
-				<div className="flex items-center justify-center py-8">
-					<p className="text-muted-foreground">Loading configuration...</p>
-				</div>
-			)}
-
 			{configError !== undefined && (
 				<div className="border-destructive/50 bg-destructive/10 rounded-lg border p-4">
 					<p className="text-destructive text-sm font-medium">Failed to load configuration</p>
@@ -26,7 +239,413 @@ export default function CachingView() {
 				</div>
 			)}
 
-			{!isLoading && !configError && <PluginsForm isVectorStoreEnabled={bifrostConfig?.is_cache_connected ?? false} />}
+			{isLoading && (
+				<div className="flex items-center justify-center py-8">
+					<Loader2 className="text-muted-foreground h-4 w-4 animate-spin" />
+				</div>
+			)}
+
+			{!isLoading && !configError && (
+				<div className="space-y-4">
+					{/* Enable toggle row hits PUT /api/config with
+					    client.enable_local_cache. The server-side compat-shim
+					    handles ReloadPlugin / RemovePlugin transparently. */}
+					<div className="flex items-center justify-between space-x-2">
+						<div className="space-y-0.5">
+							<label htmlFor="enable-caching" className="text-sm font-medium">
+								Enable Local Cache
+							</label>
+							<p className="text-muted-foreground text-sm">
+								Loads (or unloads) the plugin without a server restart. Configuration changes you make below mutate the live
+								plugin in place, no redeploy needed.{" "}
+								{!isVectorStoreEnabled && (
+									<span className="text-destructive font-medium">
+										Requires a vector store to be configured and enabled in <code>config.json</code>.
+									</span>
+								)}
+							</p>
+						</div>
+						<Switch
+							id="enable-caching"
+							size="md"
+							checked={cachingActive}
+							disabled={!isVectorStoreEnabled || isToggling}
+							onCheckedChange={handleToggle}
+						/>
+					</div>
+
+					{providersLoading ? (
+						<div className="flex items-center justify-center py-4">
+							<Loader2 className="text-muted-foreground h-4 w-4 animate-spin" />
+						</div>
+					) : (
+						<>
+							<div
+								className={cn("space-y-4", !cachingActive && "pointer-events-none opacity-50")}
+								aria-disabled={!cachingActive}
+							>
+								{/* Mode picker. Direct-only is first-class. */}
+								<div className="space-y-2">
+									<Label className="text-sm font-medium">Cache Mode</Label>
+									<Tabs value={mode} onValueChange={(v) => setMode(v as CacheMode)}>
+										<TabsList className="grid w-full grid-cols-2">
+											<TabsTrigger value="direct">Direct only</TabsTrigger>
+											<TabsTrigger
+												value="semantic"
+												disabled={embeddingProviders.length === 0}
+												title={
+													embeddingProviders.length === 0
+														? "Configure an embedding-capable provider to enable semantic mode."
+														: undefined
+												}
+											>
+												Direct + Semantic
+											</TabsTrigger>
+										</TabsList>
+									</Tabs>
+									<p className="text-muted-foreground text-xs">
+										{mode === "direct" ? (
+											<>
+												Direct-only mode hashes each request and replays an exact match. No embeddings, no provider needed.
+												Cheapest path, perfect for stable prompts.
+											</>
+										) : (
+											<>
+												Direct + semantic mode adds vector similarity search on top of direct hash matching. Requires an
+												embedding-capable provider and the model&apos;s real dimension. Direct hits are still served first;
+												semantic search runs only when the direct lookup misses.
+											</>
+										)}
+									</p>
+								</div>
+
+								{validationError && (
+									<div className="border-destructive/40 bg-destructive/10 text-destructive rounded-md border p-3 text-xs">
+										{validationError}
+									</div>
+								)}
+
+								{/* Provider/model/dimension only appear in semantic mode. */}
+								{mode === "semantic" && (
+									<>
+										{hasStructuralChange && (
+											<div className="rounded-md border border-amber-200 bg-amber-50 p-3 text-xs text-amber-900">
+												<b>Heads up:</b> a vector store namespace can only hold vectors of <em>one</em> dimension. Whenever you
+												change the embedding <b>provider</b>, <b>model</b>, or <b>dimension</b>, make sure the <b>dimension</b>{" "}
+												still matches what the model produces, otherwise writes to the existing namespace will fail and reads
+												will silently miss. The namespace is <em>not</em> recreated automatically; either use a fresh namespace
+												or drop the existing class/index in your vector store before saving.
+											</div>
+										)}
+
+										<div className="space-y-4">
+											<h3 className="text-sm font-medium">Embedding Provider &amp; Model</h3>
+											<div className="grid grid-cols-2 gap-4">
+												<div className="space-y-2">
+													<Label htmlFor="provider">Configured Providers</Label>
+													<Select
+														value={cacheConfig.provider}
+														onValueChange={(value: ModelProviderName) =>
+															updateLocal({
+																provider: value,
+																embedding_model: value === cacheConfig.provider ? cacheConfig.embedding_model : "",
+															})
+														}
+													>
+														<SelectTrigger className="w-full">
+															<SelectValue placeholder="Select provider" />
+														</SelectTrigger>
+														<SelectContent>
+															{embeddingProviders
+																.filter((provider) => provider.name)
+																.map((provider) => (
+																	<SelectItem key={provider.name} value={provider.name}>
+																		<div className="flex items-center gap-2">
+																			<RenderProviderIcon
+																				provider={provider.name as ProviderIconType}
+																				size="sm"
+																				className="h-4 w-4"
+																			/>
+																			<span>{getProviderLabel(provider.name)}</span>
+																		</div>
+																	</SelectItem>
+																))}
+														</SelectContent>
+													</Select>
+												</div>
+												<div className="space-y-2">
+													<Label htmlFor="embedding_model">Embedding Model*</Label>
+													<ModelMultiselect
+														inputId="embedding_model"
+														isSingleSelect
+														provider={cacheConfig.provider || undefined}
+														value={cacheConfig.embedding_model ?? ""}
+														onChange={(model) => updateLocal({ embedding_model: model })}
+														placeholder={cacheConfig.provider ? "Search or type an embedding model..." : "Select a provider first"}
+														disabled={!cacheConfig.provider}
+													/>
+												</div>
+											</div>
+											<p className="text-muted-foreground text-xs">
+												API keys are inherited from the embedding provider&apos;s main configuration, you don&apos;t need to
+												add them again here.
+											</p>
+											<div className="space-y-2">
+												<Label htmlFor="dimension">Dimension</Label>
+												<Input
+													id="dimension"
+													type="number"
+													min="2"
+													value={
+														cacheConfig.dimension === undefined || Number.isNaN(cacheConfig.dimension) ? "" : cacheConfig.dimension
+													}
+													onChange={(e) => {
+														const value = e.target.value;
+														if (value === "") {
+															updateLocal({ dimension: undefined });
+															return;
+														}
+														const parsed = parseInt(value);
+														if (!Number.isNaN(parsed)) {
+															updateLocal({ dimension: parsed });
+														}
+													}}
+												/>
+												<p className="text-muted-foreground text-xs">
+													Vector size produced by the embedding model. Must match the model exactly (e.g. <code>1536</code>{" "}
+													for OpenAI <code>text-embedding-3-small</code>, <code>3072</code> for{" "}
+													<code>text-embedding-3-large</code>, <code>768</code> for many Cohere/Voyage models).
+												</p>
+											</div>
+										</div>
+									</>
+								)}
+
+								{/* Cache settings shared across modes. */}
+								<div className="space-y-4">
+									<h3 className="text-sm font-medium">Cache Settings</h3>
+									<div className={cn("grid gap-4", mode === "semantic" ? "grid-cols-2" : "grid-cols-1")}>
+										<div className="space-y-2">
+											<Label htmlFor="ttl">TTL (seconds)</Label>
+											<Input
+												id="ttl"
+												type="number"
+												min="1"
+												value={cacheConfig.ttl === undefined || Number.isNaN(cacheConfig.ttl) ? "" : cacheConfig.ttl}
+												onChange={(e) => {
+													const value = e.target.value;
+													if (value === "") {
+														updateLocal({ ttl: undefined });
+														return;
+													}
+													const parsed = parseInt(value);
+													if (!Number.isNaN(parsed)) {
+														updateLocal({ ttl: parsed });
+													}
+												}}
+											/>
+											<p className="text-muted-foreground text-xs">
+												How long cached entries live before they expire. Override per-request via the <b>x-bf-cache-ttl</b> header.
+											</p>
+										</div>
+										{mode === "semantic" && (
+											<div className="space-y-2">
+												<Label htmlFor="threshold">Similarity Threshold</Label>
+												<Input
+													id="threshold"
+													type="number"
+													min="0"
+													max="1"
+													step="0.01"
+													value={
+														cacheConfig.threshold === undefined || Number.isNaN(cacheConfig.threshold) ? "" : cacheConfig.threshold
+													}
+													onChange={(e) => {
+														const value = e.target.value;
+														if (value === "") {
+															updateLocal({ threshold: undefined });
+															return;
+														}
+														const parsed = parseFloat(value);
+														if (!Number.isNaN(parsed)) {
+															updateLocal({ threshold: parsed });
+														}
+													}}
+												/>
+												<p className="text-muted-foreground text-xs">
+													Minimum cosine similarity for a semantic hit. Override per-request via{" "}
+													<b>x-bf-cache-threshold</b>.
+												</p>
+											</div>
+										)}
+									</div>
+								</div>
+
+								{/* Storage & Cache Key. */}
+								<div className="space-y-4">
+									<h3 className="text-sm font-medium">Storage &amp; Cache Key</h3>
+									<div className="grid grid-cols-2 gap-4">
+										<div className="space-y-2">
+											<Label htmlFor="vector_store_namespace">Vector Store Namespace</Label>
+											<Input
+												id="vector_store_namespace"
+												type="text"
+												placeholder="BifrostLocalCachePlugin"
+												value={cacheConfig.vector_store_namespace ?? ""}
+												onChange={(e) => updateLocal({ vector_store_namespace: e.target.value })}
+											/>
+											<p className="text-muted-foreground text-xs">
+												Bucket/index name where cache entries live. Leave blank to use the default (
+												<code>BifrostLocalCachePlugin</code>). Changing this points the plugin at a different (possibly empty)
+												bucket. Old entries are not deleted, they just stop being queried.
+											</p>
+										</div>
+										<div className="space-y-2">
+											<Label htmlFor="default_cache_key">Default Cache Key</Label>
+											<Input
+												id="default_cache_key"
+												type="text"
+												placeholder="(none)"
+												value={cacheConfig.default_cache_key ?? ""}
+												onChange={(e) => updateLocal({ default_cache_key: e.target.value })}
+											/>
+											<p className="text-muted-foreground text-xs">
+												Fallback partition key used when a request doesn&apos;t set <b>x-bf-cache-key</b>. Cache keys isolate
+												entries: same key ↔ shared cache pool. Leave blank to <b>disable caching</b> for any request that
+												doesn&apos;t send the header.
+											</p>
+										</div>
+									</div>
+								</div>
+
+								{/* Conversation Settings. */}
+								<div className="space-y-4">
+									<h3 className="text-sm font-medium">Conversation Settings</h3>
+									<div className="grid grid-cols-2 gap-4">
+										<div className="space-y-2">
+											<Label htmlFor="conversation_history_threshold">Conversation History Threshold</Label>
+											<Input
+												id="conversation_history_threshold"
+												type="number"
+												min="1"
+												max="50"
+												value={cacheConfig.conversation_history_threshold || 3}
+												onChange={(e) =>
+													updateLocal({ conversation_history_threshold: parseInt(e.target.value) || 3 })
+												}
+											/>
+											<p className="text-muted-foreground text-xs">
+												Skip caching for conversations with more than this many messages. Long histories rarely match exactly
+												and inflate the cache without paying off.
+											</p>
+										</div>
+									</div>
+									<div className="space-y-2">
+										<div className="flex h-fit items-center justify-between space-x-2 rounded-lg border p-3">
+											<div className="space-y-0.5">
+												<Label className="text-sm font-medium">Exclude System Prompt</Label>
+												<p className="text-muted-foreground text-xs">Strip system messages from the cache key.</p>
+											</div>
+											<Switch
+												checked={cacheConfig.exclude_system_prompt || false}
+												onCheckedChange={(checked) => updateLocal({ exclude_system_prompt: checked })}
+												size="md"
+											/>
+										</div>
+									</div>
+								</div>
+
+								{/* Cache Behavior applies to both modes. */}
+								<div className="space-y-4">
+									<h3 className="text-sm font-medium">Cache Key Composition</h3>
+									<div className="space-y-3">
+										<div className="flex items-center justify-between space-x-2 rounded-lg border p-3">
+											<div className="space-y-0.5">
+												<Label className="text-sm font-medium">Cache by Model</Label>
+												<p className="text-muted-foreground text-xs">
+													Include model name in the cache key. Different models won&apos;t share cached responses.
+												</p>
+											</div>
+											<Switch
+												checked={cacheConfig.cache_by_model}
+												onCheckedChange={(checked) => updateLocal({ cache_by_model: checked })}
+												size="md"
+											/>
+										</div>
+										<div className="flex items-center justify-between space-x-2 rounded-lg border p-3">
+											<div className="space-y-0.5">
+												<Label className="text-sm font-medium">Cache by Provider</Label>
+												<p className="text-muted-foreground text-xs">
+													Include provider name in the cache key. Different providers won&apos;t share cached responses.
+												</p>
+											</div>
+											<Switch
+												checked={cacheConfig.cache_by_provider}
+												onCheckedChange={(checked) => updateLocal({ cache_by_provider: checked })}
+												size="md"
+											/>
+										</div>
+									</div>
+								</div>
+
+								<div className="space-y-2">
+									<Label className="text-sm font-medium">Per-request overrides</Label>
+									<ul className="text-muted-foreground list-inside list-disc text-xs">
+										<li>
+											<b>x-bf-cache-key</b>: scope this request to a specific cache partition.
+										</li>
+										<li>
+											<b>x-bf-cache-ttl</b>: override TTL for just this request.
+										</li>
+										<li>
+											<b>x-bf-cache-threshold</b>: override the semantic similarity threshold.
+										</li>
+										<li>
+											<b>x-bf-cache-type</b>: send <code>direct</code> or <code>semantic</code> to limit lookup to one path.
+										</li>
+										<li>
+											<b>x-bf-cache-no-store</b>: <code>true</code> to skip writing the response (still serves cached hits).
+										</li>
+									</ul>
+								</div>
+
+								{/* Danger zone. cleanup_on_shutdown is destructive: it deletes
+								    every entry tagged from_bifrost_local_cache_plugin and drops
+								    the namespace on plugin Cleanup. Surfaced behind explicit
+								    visual treatment so it isn't toggled accidentally. */}
+								<div className="space-y-4">
+									<h3 className="text-destructive text-sm font-medium">Danger Zone</h3>
+									<div className="border-destructive/40 bg-destructive/5 flex items-center justify-between space-x-2 rounded-lg border p-3">
+										<div className="space-y-0.5">
+											<Label className="text-sm font-medium">Cleanup on Shutdown</Label>
+											<p className="text-muted-foreground text-xs">
+												On every plugin shutdown, delete every cache entry tagged{" "}
+												<code>from_bifrost_local_cache_plugin</code> and drop the vector store namespace. Plugin shutdown
+												happens on server restart <em>and</em> whenever you toggle <b>Enable Local Cache</b> off.{" "}
+												<span className="text-destructive font-medium">
+													Destructive: every cached entry is lost the next time the plugin shuts down.
+												</span>{" "}
+												Default off so cache survives restarts and toggle flips.
+											</p>
+										</div>
+										<Switch
+											checked={cacheConfig.cleanup_on_shutdown || false}
+											onCheckedChange={(checked) => updateLocal({ cleanup_on_shutdown: checked })}
+											size="md"
+										/>
+									</div>
+								</div>
+							</div>
+
+							<div className="flex justify-end pt-2">
+								<Button onClick={handleSave} disabled={!hasUnsavedConfigChanges || isSaving || Boolean(validationError)}>
+									{isSaving ? "Saving..." : "Save Changes"}
+								</Button>
+							</div>
+						</>
+					)}
+				</div>
+			)}
 		</div>
 	);
-}
\ No newline at end of file
+}
diff --git a/ui/app/workspace/config/views/pluginsForm.tsx b/ui/app/workspace/config/views/pluginsForm.tsx
deleted file mode 100644
index fc4ddae7da..0000000000
--- a/ui/app/workspace/config/views/pluginsForm.tsx
+++ /dev/null
@@ -1,566 +0,0 @@
-import { Button } from "@/components/ui/button";
-import { Card, CardContent } from "@/components/ui/card";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import { ModelMultiselect } from "@/components/ui/modelMultiselect";
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
-import { Separator } from "@/components/ui/separator";
-import { Switch } from "@/components/ui/switch";
-import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons";
-import { EmbeddingSupportedProviders, getProviderLabel } from "@/lib/constants/logs";
-import { getErrorMessage, useCreatePluginMutation, useGetPluginsQuery, useGetProvidersQuery, useUpdatePluginMutation } from "@/lib/store";
-import { CacheConfig, EditorCacheConfig, ModelProvider, ModelProviderName } from "@/lib/types/config";
-import { SEMANTIC_CACHE_PLUGIN } from "@/lib/types/plugins";
-import { cacheConfigSchema } from "@/lib/types/schemas";
-import { Loader2 } from "lucide-react";
-import { useEffect, useMemo, useState } from "react";
-import { toast } from "sonner";
-
-// Semantic caching needs an embedding-capable provider. Built-in providers are
-// gated by EmbeddingSupportedProviders; custom providers expose support via
-// custom_provider_config.allowed_requests.embedding.
-const supportsEmbedding = (provider: ModelProvider): boolean => {
-	if (provider.custom_provider_config) {
-		return provider.custom_provider_config.allowed_requests?.embedding === true;
-	}
-	return (EmbeddingSupportedProviders as readonly string[]).includes(provider.name);
-};
-
-const defaultCacheConfig: EditorCacheConfig = {
-	ttl: 300,
-	threshold: 0.8,
-	conversation_history_threshold: 3,
-	exclude_system_prompt: false,
-	cache_by_model: true,
-	cache_by_provider: true,
-};
-
-const toEditorCacheConfig = (config?: Partial<CacheConfig> & { ttl_seconds?: number }): EditorCacheConfig => {
-	const { ttl_seconds, ...rest } = config ?? {};
-	const merged: EditorCacheConfig = { ...defaultCacheConfig, ...rest };
-	// Migration: older saves stored TTL under `ttl_seconds`; the Go plugin only
-	// reads `ttl`, so adopt the legacy value if the new field isn't present.
-	if (rest.ttl === undefined && typeof ttl_seconds === "number") {
-		merged.ttl = ttl_seconds;
-	}
-	return merged;
-};
-
-const normalizeCacheConfigForSave = (config: EditorCacheConfig) => {
-	const normalized: Record<string, unknown> = {
-		ttl: config.ttl,
-		threshold: config.threshold,
-		cache_by_model: config.cache_by_model,
-		cache_by_provider: config.cache_by_provider,
-	};
-
-	if (config.conversation_history_threshold !== undefined) {
-		normalized.conversation_history_threshold = config.conversation_history_threshold;
-	}
-	if (config.exclude_system_prompt !== undefined) {
-		normalized.exclude_system_prompt = config.exclude_system_prompt;
-	}
-	if (config.created_at !== undefined) {
-		normalized.created_at = config.created_at;
-	}
-	if (config.updated_at !== undefined) {
-		normalized.updated_at = config.updated_at;
-	}
-
-	const provider = config.provider?.trim();
-	const embeddingModel = config.embedding_model?.trim();
-	const namespace = config.vector_store_namespace?.trim();
-	const defaultKey = config.default_cache_key?.trim();
-
-	if (provider) {
-		normalized.provider = provider;
-	}
-	if (embeddingModel) {
-		normalized.embedding_model = embeddingModel;
-	}
-	if (config.dimension !== undefined) {
-		normalized.dimension = config.dimension;
-	}
-	if (namespace) {
-		normalized.vector_store_namespace = namespace;
-	}
-	if (defaultKey) {
-		normalized.default_cache_key = defaultKey;
-	}
-
-	return normalized;
-};
-
-interface PluginsFormProps {
-	isVectorStoreEnabled: boolean;
-}
-
-export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) {
-	const [cacheConfig, setCacheConfig] = useState<EditorCacheConfig>(defaultCacheConfig);
-	const [originalCacheEnabled, setOriginalCacheEnabled] = useState<boolean>(false);
-	const [serverCacheConfig, setServerCacheConfig] = useState<EditorCacheConfig>(defaultCacheConfig);
-	const [serverCacheEnabled, setServerCacheEnabled] = useState<boolean>(false);
-
-	const { data: providersData, error: providersError, isLoading: providersLoading } = useGetProvidersQuery();
-
-	const providers = useMemo(() => providersData || [], [providersData]);
-	const embeddingProviders = useMemo(() => providers.filter(supportsEmbedding), [providers]);
-
-	useEffect(() => {
-		if (providersError) {
-			toast.error(`Failed to load providers: ${getErrorMessage(providersError as any)}`);
-		}
-	}, [providersError]);
-
-	// RTK Query hooks
-	const { data: plugins, isLoading: loading } = useGetPluginsQuery();
-	const [updatePlugin, { isLoading: isUpdating }] = useUpdatePluginMutation();
-	const [createPlugin, { isLoading: isCreating }] = useCreatePluginMutation();
-
-	// Get semantic cache plugin and its config
-	const semanticCachePlugin = useMemo(() => plugins?.find((plugin) => plugin.name === SEMANTIC_CACHE_PLUGIN), [plugins]);
-
-	const isSemanticCacheEnabled = Boolean(semanticCachePlugin?.enabled);
-	const loadedDirectOnlyConfig = serverCacheConfig.dimension === 1 && !serverCacheConfig.provider;
-	const hasInvalidProviderBackedDimension = cacheConfig.dimension === 1 && Boolean(cacheConfig.provider?.trim());
-
-	// Initialize cache config from plugin data
-	useEffect(() => {
-		if (semanticCachePlugin?.config) {
-			const config = toEditorCacheConfig(semanticCachePlugin.config as Partial<CacheConfig>);
-			setCacheConfig(config);
-			setServerCacheConfig(config);
-			setOriginalCacheEnabled(semanticCachePlugin.enabled);
-			setServerCacheEnabled(semanticCachePlugin.enabled);
-		}
-	}, [semanticCachePlugin]);
-
-	// Seed default provider/model/dimension when the providers list loads, but
-	// only for new configs that haven't picked a provider yet — re-running this
-	// effect on subsequent embeddingProviders changes would otherwise clobber
-	// an in-progress user selection.
-	useEffect(() => {
-		if (embeddingProviders.length > 0 && !semanticCachePlugin?.config) {
-			setCacheConfig((prev) => {
-				if (prev.provider) return prev;
-				return {
-					...prev,
-					provider: embeddingProviders[0].name as ModelProviderName,
-					embedding_model: prev.embedding_model ?? "text-embedding-3-small",
-					dimension: prev.dimension ?? 1536,
-				};
-			});
-		}
-	}, [embeddingProviders, semanticCachePlugin?.config]);
-
-	const hasChanges = useMemo(() => {
-		if (originalCacheEnabled !== serverCacheEnabled) return true;
-
-		return (
-			cacheConfig.provider !== serverCacheConfig.provider ||
-			cacheConfig.embedding_model !== serverCacheConfig.embedding_model ||
-			cacheConfig.dimension !== serverCacheConfig.dimension ||
-			cacheConfig.ttl !== serverCacheConfig.ttl ||
-			cacheConfig.threshold !== serverCacheConfig.threshold ||
-			cacheConfig.conversation_history_threshold !== serverCacheConfig.conversation_history_threshold ||
-			cacheConfig.exclude_system_prompt !== serverCacheConfig.exclude_system_prompt ||
-			cacheConfig.cache_by_model !== serverCacheConfig.cache_by_model ||
-			cacheConfig.cache_by_provider !== serverCacheConfig.cache_by_provider ||
-			(cacheConfig.vector_store_namespace ?? "") !== (serverCacheConfig.vector_store_namespace ?? "") ||
-			(cacheConfig.default_cache_key ?? "") !== (serverCacheConfig.default_cache_key ?? "")
-		);
-	}, [cacheConfig, serverCacheConfig, originalCacheEnabled, serverCacheEnabled]);
-
-	// Handle semantic cache toggle (create or update)
-	const handleSemanticCacheToggle = (enabled: boolean) => {
-		setOriginalCacheEnabled(enabled);
-	};
-
-	// Update cache config locally
-	const updateCacheConfigLocal = (updates: Partial<EditorCacheConfig>) => {
-		setCacheConfig((prev) => ({ ...prev, ...updates }));
-	};
-
-	// Save all changes
-	const handleSave = async () => {
-		if (hasInvalidProviderBackedDimension) {
-			toast.error(
-				"Provider-backed semantic cache requires the embedding model's real dimension. Use a value greater than 1, or remove the provider to keep direct-only mode.",
-			);
-			return;
-		}
-
-		const parseResult = cacheConfigSchema.safeParse(normalizeCacheConfigForSave(cacheConfig));
-		if (!parseResult.success) {
-			const firstIssue = parseResult.error.issues[0]?.message ?? "Semantic cache configuration is invalid.";
-			toast.error(firstIssue);
-			return;
-		}
-
-		const savedConfig = parseResult.data as CacheConfig;
-
-		try {
-			if (semanticCachePlugin) {
-				// Update existing plugin
-				await updatePlugin({
-					name: SEMANTIC_CACHE_PLUGIN,
-					data: { enabled: originalCacheEnabled, config: savedConfig },
-				}).unwrap();
-			} else {
-				// Create new plugin
-				await createPlugin({
-					name: SEMANTIC_CACHE_PLUGIN,
-					enabled: originalCacheEnabled,
-					config: savedConfig,
-					path: "",
-				}).unwrap();
-			}
-			toast.success("Plugin configuration updated successfully");
-			// Update server state to match current state
-			const normalizedConfig = toEditorCacheConfig(savedConfig);
-			setCacheConfig(normalizedConfig);
-			setServerCacheConfig(normalizedConfig);
-			setServerCacheEnabled(originalCacheEnabled);
-		} catch (error) {
-			const errorMessage = getErrorMessage(error);
-			toast.error(`Failed to update plugin configuration: ${errorMessage}`);
-		}
-	};
-
-	if (loading) {
-		return (
-			<Card>
-				<CardContent className="p-6">
-					<div className="text-muted-foreground">Loading plugins configuration...</div>
-				</CardContent>
-			</Card>
-		);
-	}
-
-	return (
-		<div className="space-y-6">
-			{/* Semantic Cache Toggle */}
-			<div className="rounded-lg border p-4">
-				<div className="flex items-center justify-between space-x-2">
-					<div className="flex-1 space-y-0.5">
-						<label htmlFor="enable-caching" className="text-sm font-medium">
-							Enable Semantic Caching
-						</label>
-						<p className="text-muted-foreground text-sm">
-							Enable semantic caching for requests. Send <b>x-bf-cache-key</b> header with requests to use semantic caching.{" "}
-							{!isVectorStoreEnabled && (
-								<span className="text-destructive font-medium">Requires vector store to be configured and enabled in config.json.</span>
-							)}
-							{!providersLoading && providers?.length === 0 && (
-								<span className="text-destructive font-medium"> Requires at least one provider to be configured.</span>
-							)}
-							{!providersLoading && providers.length > 0 && embeddingProviders.length === 0 && (
-								<span className="text-destructive font-medium">
-									{" "}
-									Requires at least one provider that supports embedding requests. Configure a built-in embedding provider, or enable the
-									<code className="mx-1">embedding</code>request type on a custom provider.
-								</span>
-							)}
-						</p>
-					</div>
-					<div className="flex items-center gap-2">
-						<Switch
-							id="enable-caching"
-							size="md"
-							checked={originalCacheEnabled && isVectorStoreEnabled}
-							disabled={!isVectorStoreEnabled || providersLoading || embeddingProviders.length === 0}
-							onCheckedChange={(checked) => {
-								if (isVectorStoreEnabled) {
-									handleSemanticCacheToggle(checked);
-								}
-							}}
-						/>
-					</div>
-				</div>
-
-				{/* Cache Configuration (only show when enabled) */}
-				{originalCacheEnabled &&
-					isVectorStoreEnabled &&
-					(providersLoading ? (
-						<div className="flex items-center justify-center">
-							<Loader2 className="h-4 w-4 animate-spin" />
-						</div>
-					) : (
-						<div className="mt-4 space-y-4">
-							<Separator />
-							{loadedDirectOnlyConfig && (
-								<div className="rounded-md border border-amber-200 bg-amber-50 p-3 text-xs text-amber-900">
-									This plugin was loaded in direct-only mode via <code>config.json</code>. The Web UI currently edits provider-backed
-									semantic cache settings; keep using <code>config.json</code> if you want to stay in direct-only mode.
-								</div>
-							)}
-							{hasInvalidProviderBackedDimension && (
-								<div className="rounded-md border border-red-200 bg-red-50 p-3 text-xs text-red-900">
-									You selected a provider while keeping <code>dimension: 1</code>. That is only valid for direct-only mode. Set the
-									embedding model&apos;s real dimension before saving, or remove the provider to stay in direct-only mode.
-								</div>
-							)}
-							<div className="rounded-md border border-amber-200 bg-amber-50 p-3 text-xs text-amber-900">
-								<b>Heads up:</b> a vector store namespace can only hold vectors of <em>one</em> dimension. Whenever you
-								change the embedding <b>provider</b>, <b>model</b>, or <b>dimension</b>, make sure the <b>dimension</b> still matches what the model produces - otherwise writes to the existing namespace will
-								fail and reads will silently miss. The namespace is <em>not</em> recreated automatically; either use a fresh namespace or drop the existing class/index in your vector store
-								before saving.
-							</div>
-							{/* Provider and Model Settings */}
-							<div className="space-y-4">
-								<h3 className="text-sm font-medium">Provider and Model Settings</h3>
-								<div className="grid grid-cols-2 gap-4">
-									<div className="space-y-2">
-										<Label htmlFor="provider">Configured Providers</Label>
-										<Select
-											value={cacheConfig.provider}
-											onValueChange={(value: ModelProviderName) =>
-												updateCacheConfigLocal({
-													provider: value,
-													embedding_model: value === cacheConfig.provider ? cacheConfig.embedding_model : "",
-												})
-											}
-										>
-											<SelectTrigger className="w-full">
-												<SelectValue placeholder="Select provider" />
-											</SelectTrigger>
-											<SelectContent>
-												{embeddingProviders
-													.filter((provider) => provider.name)
-													.map((provider) => (
-														<SelectItem key={provider.name} value={provider.name}>
-															<div className="flex items-center gap-2">
-																<RenderProviderIcon provider={provider.name as ProviderIconType} size="sm" className="h-4 w-4" />
-																<span>{getProviderLabel(provider.name)}</span>
-															</div>
-														</SelectItem>
-													))}
-											</SelectContent>
-										</Select>
-									</div>
-									<div className="space-y-2">
-										<Label htmlFor="embedding_model">Embedding Model*</Label>
-										<ModelMultiselect
-											inputId="embedding_model"
-											isSingleSelect
-											provider={cacheConfig.provider || undefined}
-											value={cacheConfig.embedding_model ?? ""}
-											onChange={(model) => updateCacheConfigLocal({ embedding_model: model })}
-											placeholder={cacheConfig.provider ? "Search or type an embedding model..." : "Select a provider first"}
-											disabled={!cacheConfig.provider}
-										/>
-									</div>
-								</div>
-								<p className="text-muted-foreground text-xs">
-									API keys for the embedding provider will be inherited from the main provider configuration. The semantic cache will use
-									the configured provider&apos;s keys automatically.
-								</p>
-							</div>
-
-							{/* Cache Settings */}
-							<div className="space-y-4">
-								<h3 className="text-sm font-medium">Cache Settings</h3>
-								<div className="grid grid-cols-2 gap-4">
-									<div className="space-y-2">
-										<Label htmlFor="ttl">TTL (seconds)</Label>
-										<Input
-											id="ttl"
-											type="number"
-											min="1"
-											value={cacheConfig.ttl === undefined || Number.isNaN(cacheConfig.ttl) ? "" : cacheConfig.ttl}
-											onChange={(e) => {
-												const value = e.target.value;
-												if (value === "") {
-													updateCacheConfigLocal({ ttl: undefined });
-													return;
-												}
-												const parsed = parseInt(value);
-												if (!Number.isNaN(parsed)) {
-													updateCacheConfigLocal({ ttl: parsed });
-												}
-											}}
-										/>
-									</div>
-									<div className="space-y-2">
-										<Label htmlFor="threshold">Similarity Threshold</Label>
-										<Input
-											id="threshold"
-											type="number"
-											min="0"
-											max="1"
-											step="0.01"
-											value={cacheConfig.threshold === undefined || Number.isNaN(cacheConfig.threshold) ? "" : cacheConfig.threshold}
-											onChange={(e) => {
-												const value = e.target.value;
-												if (value === "") {
-													updateCacheConfigLocal({ threshold: undefined });
-													return;
-												}
-												const parsed = parseFloat(value);
-												if (!Number.isNaN(parsed)) {
-													updateCacheConfigLocal({ threshold: parsed });
-												}
-											}}
-										/>
-									</div>
-									<div className="space-y-2">
-										<Label htmlFor="dimension">Dimension</Label>
-										<Input
-											id="dimension"
-											type="number"
-											min="1"
-											value={cacheConfig.dimension === undefined || Number.isNaN(cacheConfig.dimension) ? "" : cacheConfig.dimension}
-											onChange={(e) => {
-												const value = e.target.value;
-												if (value === "") {
-													updateCacheConfigLocal({ dimension: undefined });
-													return;
-												}
-												const parsed = parseInt(value);
-												if (!Number.isNaN(parsed)) {
-													updateCacheConfigLocal({ dimension: parsed });
-												}
-											}}
-										/>
-										<p className="text-muted-foreground text-xs">
-											Vector size produced by the embedding model - must match the model exactly (e.g. <code>1536</code> for
-											OpenAI <code>text-embedding-3-small</code>, <code>3072</code> for <code>text-embedding-3-large</code>,
-											<code>768</code> for many Cohere/Voyage models). Use <code>1</code> only in direct-only mode (no provider).
-										</p>
-									</div>
-								</div>
-							</div>
-
-							{/* Storage & Cache Key */}
-							<div className="space-y-4">
-								<h3 className="text-sm font-medium">Storage & Cache Key</h3>
-								<div className="grid grid-cols-2 gap-4">
-									<div className="space-y-2">
-										<Label htmlFor="vector_store_namespace">Vector Store Namespace</Label>
-										<Input
-											id="vector_store_namespace"
-											type="text"
-											placeholder="BifrostSemanticCachePlugin"
-											value={cacheConfig.vector_store_namespace ?? ""}
-											onChange={(e) => updateCacheConfigLocal({ vector_store_namespace: e.target.value })}
-										/>
-										<p className="text-muted-foreground text-xs">
-											Bucket/index name where cache entries are stored in the vector store. Leave blank to use the default
-											(<code>BifrostSemanticCachePlugin</code>). Changing the namespace points the plugin at a different (possibly empty) bucket. All previously
-											cached entries become inaccessible - every request will miss until the new namespace is repopulated.
-										</p>
-									</div>
-									<div className="space-y-2">
-										<Label htmlFor="default_cache_key">Default Cache Key</Label>
-										<Input
-											id="default_cache_key"
-											type="text"
-											placeholder="(none)"
-											value={cacheConfig.default_cache_key ?? ""}
-											onChange={(e) => updateCacheConfigLocal({ default_cache_key: e.target.value })}
-										/>
-										<p className="text-muted-foreground text-xs">
-											Fallback value used as the cache partition when a request doesn&apos;t set the <b>x-bf-cache-key</b> header.
-											Cache keys isolate entries: requests that share a key can hit each other&apos;s cached responses, while requests
-											with different keys can&apos;t. Leaving this blank means caching is <b>disabled</b> for any request that doesn&apos;t
-											send the header.
-										</p>
-									</div>
-								</div>
-							</div>
-
-							{/* Conversation Settings */}
-							<div className="space-y-4">
-								<h3 className="text-sm font-medium">Conversation Settings</h3>
-								<div className="grid grid-cols-2 gap-4">
-									<div className="space-y-2">
-										<Label htmlFor="conversation_history_threshold">Conversation History Threshold</Label>
-										<Input
-											id="conversation_history_threshold"
-											type="number"
-											min="1"
-											max="50"
-											value={cacheConfig.conversation_history_threshold || 3}
-											onChange={(e) => updateCacheConfigLocal({ conversation_history_threshold: parseInt(e.target.value) || 3 })}
-										/>
-										<p className="text-muted-foreground text-xs">
-											Skip caching for conversations with more than this number of messages (prevents false positives)
-										</p>
-									</div>
-								</div>
-								<div className="space-y-2">
-									<div className="flex h-fit items-center justify-between space-x-2 rounded-lg border p-3">
-										<div className="space-y-0.5">
-											<Label className="text-sm font-medium">Exclude System Prompt</Label>
-											<p className="text-muted-foreground text-xs">Exclude system messages from cache key generation</p>
-										</div>
-										<Switch
-											checked={cacheConfig.exclude_system_prompt || false}
-											onCheckedChange={(checked) => updateCacheConfigLocal({ exclude_system_prompt: checked })}
-											size="md"
-										/>
-									</div>
-								</div>
-							</div>
-
-							{/* Cache Behavior */}
-							<div className="space-y-4">
-								<h3 className="text-sm font-medium">Cache Behavior</h3>
-								<div className="space-y-3">
-									<div className="flex items-center justify-between space-x-2 rounded-lg border p-3">
-										<div className="space-y-0.5">
-											<Label className="text-sm font-medium">Cache by Model</Label>
-											<p className="text-muted-foreground text-xs">Include model name in cache key</p>
-										</div>
-										<Switch
-											checked={cacheConfig.cache_by_model}
-											onCheckedChange={(checked) => updateCacheConfigLocal({ cache_by_model: checked })}
-											size="md"
-										/>
-									</div>
-									<div className="flex items-center justify-between space-x-2 rounded-lg border p-3">
-										<div className="space-y-0.5">
-											<Label className="text-sm font-medium">Cache by Provider</Label>
-											<p className="text-muted-foreground text-xs">Include provider name in cache key</p>
-										</div>
-										<Switch
-											checked={cacheConfig.cache_by_provider}
-											onCheckedChange={(checked) => updateCacheConfigLocal({ cache_by_provider: checked })}
-											size="md"
-										/>
-									</div>
-								</div>
-							</div>
-
-							<div className="space-y-2">
-								<Label className="text-sm font-medium">Notes</Label>
-								<ul className="text-muted-foreground list-inside list-disc text-xs">
-									<li>
-										You can pass <b>x-bf-cache-ttl</b> header with requests to use request-specific TTL.
-									</li>
-									<li>
-										You can pass <b>x-bf-cache-threshold</b> header with requests to use request-specific similarity threshold.
-									</li>
-									<li>
-										You can pass <b>x-bf-cache-type</b> header with &quot;direct&quot; or &quot;semantic&quot; to control cache behavior.
-									</li>
-									<li>
-										You can pass <b>x-bf-cache-no-store</b> header with &quot;true&quot; to disable response caching.
-									</li>
-								</ul>
-							</div>
-
-							<div className="flex justify-end pt-2">
-								<Button
-									onClick={handleSave}
-									disabled={!hasChanges || isUpdating || isCreating || hasInvalidProviderBackedDimension}
-								>
-									{isUpdating || isCreating ? "Saving..." : "Save Changes"}
-								</Button>
-							</div>
-						</div>
-					))}
-			</div>
-		</div>
-	);
-}
\ No newline at end of file
diff --git a/ui/lib/store/apis/baseApi.ts b/ui/lib/store/apis/baseApi.ts
index 6214e434b0..5dcf1f7f14 100644
--- a/ui/lib/store/apis/baseApi.ts
+++ b/ui/lib/store/apis/baseApi.ts
@@ -125,6 +125,7 @@ export const baseApi = createApi({
 		"MCPClients",
 		"Config",
 		"CacheConfig",
+		"LocalCacheConfig",
 		"VirtualKeys",
 		"Teams",
 		"Customers",
diff --git a/ui/lib/store/apis/index.ts b/ui/lib/store/apis/index.ts
index 2f93191ac1..c1b22fe75e 100644
--- a/ui/lib/store/apis/index.ts
+++ b/ui/lib/store/apis/index.ts
@@ -5,6 +5,7 @@ export { baseApi, clearAuthStorage, getErrorMessage, setAuthToken } from "./base
 export * from "./configApi";
 export * from "./devApi";
 export * from "./governanceApi";
+export * from "./localCacheApi";
 export * from "./logsApi";
 export * from "./mcpApi";
 export * from "./mcpLogsApi";
diff --git a/ui/lib/store/apis/localCacheApi.ts b/ui/lib/store/apis/localCacheApi.ts
new file mode 100644
index 0000000000..b93eb61dfc
--- /dev/null
+++ b/ui/lib/store/apis/localCacheApi.ts
@@ -0,0 +1,64 @@
+import { CacheConfig } from "@/lib/types/config";
+import { baseApi } from "./baseApi";
+
+// Server returns the LocalCacheConfig struct shape directly. TTL serializes
+// as a number (nanoseconds, Go time.Duration default) when not set via the
+// custom unmarshaler — but our PUT path always sends seconds, and the
+// editor exposes it as seconds. We normalize on read to avoid surprises.
+type LocalCacheConfigResponse = Omit<CacheConfig, "ttl"> & { ttl?: number | string };
+
+export const localCacheApi = baseApi.injectEndpoints({
+	endpoints: (builder) => ({
+		// GET /api/local-cache/config — current live config. Returns an empty
+		// object when the plugin has never been configured (no DB row yet).
+		getLocalCacheConfig: builder.query<CacheConfig, void>({
+			query: () => "/local-cache/config",
+			providesTags: ["LocalCacheConfig"],
+			transformResponse: (response: LocalCacheConfigResponse): CacheConfig => {
+				// Go time.Duration JSON-serializes as a string ("5m") when the
+				// custom UnmarshalJSON path is used, or as raw nanoseconds (a
+				// number) when fields default. The editor wants seconds.
+				const ttl = (() => {
+					if (response.ttl == null) return 0;
+					if (typeof response.ttl === "number") {
+						// Heuristic: values larger than 1e7 are nanoseconds, else
+						// seconds (no real cache TTL is 10M seconds = ~115 days).
+						return response.ttl > 1e7 ? Math.round(response.ttl / 1e9) : response.ttl;
+					}
+					// String like "5m", "30s" — naive parse for the common cases.
+					const match = String(response.ttl).match(/^(\d+(?:\.\d+)?)(ns|us|µs|ms|s|m|h)?$/);
+					if (!match) return 0;
+					const value = parseFloat(match[1]);
+					switch (match[2]) {
+						case "ns": return value / 1e9;
+						case "us":
+						case "µs": return value / 1e6;
+						case "ms": return value / 1e3;
+						case "m": return value * 60;
+						case "h": return value * 3600;
+						case "s":
+						default: return value;
+					}
+				})();
+				return { ...response, ttl } as CacheConfig;
+			},
+		}),
+
+		// PUT /api/local-cache/config — persists, then mutates the in-memory
+		// shared pointer so the running plugin observes the new values on its
+		// next request without a reload.
+		updateLocalCacheConfig: builder.mutation<CacheConfig, CacheConfig>({
+			query: (data) => ({
+				url: "/local-cache/config",
+				method: "PUT",
+				body: data,
+			}),
+			invalidatesTags: ["LocalCacheConfig"],
+		}),
+	}),
+});
+
+export const {
+	useGetLocalCacheConfigQuery,
+	useUpdateLocalCacheConfigMutation,
+} = localCacheApi;
diff --git a/ui/lib/types/config.ts b/ui/lib/types/config.ts
index 47050634d0..97275f2760 100644
--- a/ui/lib/types/config.ts
+++ b/ui/lib/types/config.ts
@@ -471,6 +471,7 @@ export interface CoreConfig {
 	initial_pool_size: number;
 	prometheus_labels: string[];
 	enable_logging: boolean;
+	enable_local_cache?: boolean;
 	disable_content_logging: boolean;
 	allow_per_request_content_storage_override: boolean;
 	allow_per_request_raw_override: boolean;
@@ -503,6 +504,7 @@ export const DefaultCoreConfig: CoreConfig = {
 	initial_pool_size: 1000,
 	prometheus_labels: [],
 	enable_logging: true,
+	enable_local_cache: false,
 	disable_content_logging: false,
 	allow_per_request_content_storage_override: false,
 	allow_per_request_raw_override: false,
@@ -527,7 +529,8 @@ export const DefaultCoreConfig: CoreConfig = {
 	routing_chain_max_depth: 10,
 };
 
-// Semantic cache configuration types
+// Local cache configuration types (covers both direct hash matching and
+// embedding-based semantic similarity search)
 interface BaseCacheConfig {
 	ttl: number;
 	threshold: number;
@@ -537,6 +540,7 @@ interface BaseCacheConfig {
 	cache_by_provider: boolean;
 	vector_store_namespace?: string;
 	default_cache_key?: string;
+	cleanup_on_shutdown?: boolean;
 	created_at?: string;
 	updated_at?: string;
 }
diff --git a/ui/lib/types/plugins.ts b/ui/lib/types/plugins.ts
index ce9d2569e5..17a6e74e0c 100644
--- a/ui/lib/types/plugins.ts
+++ b/ui/lib/types/plugins.ts
@@ -1,6 +1,6 @@
 // Plugins types that match the Go backend structures
 
-export const SEMANTIC_CACHE_PLUGIN = "semantic_cache";
+export const LOCAL_CACHE_PLUGIN = "local_cache";
 export const MAXIM_PLUGIN = "maxim";
 
 export type PluginType = "llm" | "mcp" | "http";