Skip to content
Merged
106 changes: 104 additions & 2 deletions config/semantic-cache/config.hybrid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,110 @@ semantic_cache:
hnsw_m: 16 # Number of bi-directional links
hnsw_ef_construction: 200 # Construction quality parameter

# Milvus configuration file path
backend_config_path: "config/semantic-cache/milvus.yaml"
# Milvus configuration example
milvus:
# Milvus connection settings
connection:
# Milvus server host (change for production deployment)
host: "localhost" # For production: use your Milvus cluster endpoint
# Milvus server port
port: 19530 # Standard Milvus port
# Database name (optional, defaults to "default")
database: "semantic_router_cache"
# Connection timeout in seconds
timeout: 30
# Authentication (enable for production)
auth:
enabled: false # Set to true for production
username: "" # Your Milvus username
password: "" # Your Milvus password
# TLS/SSL configuration (recommended for production)
tls:
enabled: false # Set to true for secure connections
cert_file: "" # Path to client certificate
key_file: "" # Path to client private key
ca_file: "" # Path to CA certificate
# Collection settings
collection:
# Name of the collection to store cache entries
name: "semantic_cache"
# Description of the collection
description: "Semantic cache for LLM request-response pairs"
# Vector field configuration
vector_field:
# Name of the vector field
name: "embedding"
# Dimension of the embeddings (auto-detected from model at runtime)
dimension: 384 # This value is ignored - dimension is auto-detected from the embedding model
# Metric type for similarity calculation
metric_type: "IP" # Inner Product (cosine similarity for normalized vectors)
# Index configuration for the vector field
index:
# Index type (HNSW is recommended for most use cases)
type: "HNSW"
# Index parameters
params:
M: 16 # Number of bi-directional links for each node
efConstruction: 64 # Search scope during index construction
# Search configuration
search:
# Search parameters
params:
ef: 64 # Search scope during search (should be >= topk)
# Number of top results to retrieve for similarity comparison
topk: 10
# Consistency level for search operations
consistency_level: "Session" # Options: Strong, Session, Bounded, Eventually
# Performance and resource settings
performance:
# Connection pool settings
connection_pool:
# Maximum number of connections in the pool
max_connections: 10
# Maximum idle connections
max_idle_connections: 5
# Connection timeout for acquiring from pool
acquire_timeout: 5
# Batch operation settings
batch:
# Maximum batch size for insert operations
insert_batch_size: 1000
# Batch timeout in seconds
timeout: 30
# Data management
data_management:
# Automatic data expiration (TTL) settings
ttl:
# Enable automatic TTL-based cleanup (requires TTL to be set in main config)
enabled: true
# Field name to store timestamp for TTL calculation
timestamp_field: "timestamp"
# Cleanup interval in seconds (how often to run cleanup)
cleanup_interval: 3600 # 1 hour
# Compaction settings
compaction:
# Enable automatic compaction
enabled: true
# Compaction interval in seconds
interval: 86400 # 24 hours
# Logging and monitoring
logging:
# Log level for Milvus client operations (debug, info, warn, error)
level: "info"
# Enable query/search logging for debugging
enable_query_log: false
# Enable performance metrics collection
enable_metrics: true
# Development and debugging settings
development:
# Drop collection on startup (WARNING: This will delete all cached data)
drop_collection_on_startup: true # Enable for development to test dynamic dimensions
# Create collection if it doesn't exist
auto_create_collection: true
# Print detailed error messages
verbose_errors: true
# (Deprecated) Or you can set up the milvus connection using the below config:
# backend_config_path: "config/semantic-cache/milvus.yaml"

tools:
enabled: true
Expand Down
67 changes: 66 additions & 1 deletion config/semantic-cache/config.redis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,72 @@ semantic_cache:
backend_type: "redis" # Using Redis vector database for semantic cache
similarity_threshold: 0.80 # Global threshold (lowered for better matching)
ttl_seconds: 3600
backend_config_path: "config/semantic-cache/redis.yaml"

# Redis configuration example
redis:
# Connection Settings
connection:
# Redis server host (change for production deployment)
host: "localhost" # For production: use your Redis cluster endpoint
# Redis server port
port: 6379 # Standard Redis port
# Database number (0-15 for standard Redis)
database: 0
# Password for authentication (leave empty if no auth required)
password: ""
# Connection timeout in seconds
timeout: 30
# TLS/SSL configuration (recommended for production)
tls:
enabled: false # Set to true for secure connections
cert_file: "" # Path to client certificate
key_file: "" # Path to client private key
ca_file: "" # Path to CA certificate
# Index settings for vector search
index:
# Name of the search index
name: "semantic_cache_idx"
# Key prefix for documents in this index
prefix: "doc:"
# Vector field configuration
vector_field:
# Name of the vector field
name: "embedding"
# Dimension of the embeddings (auto-detected from model at runtime)
dimension: 384 # This value is ignored - dimension is auto-detected from the embedding model
# Distance metric for similarity calculation
# Options: COSINE (cosine similarity), L2 (Euclidean distance), IP (inner product)
metric_type: "COSINE" # COSINE is recommended for semantic similarity
# Index type and parameters
# Options: HNSW (Hierarchical Navigable Small World) or FLAT (brute force)
index_type: "HNSW" # HNSW is recommended for performance
# Index parameters (only used when index_type is HNSW)
params:
M: 16 # Number of bi-directional links per node (default: 16)
efConstruction: 64 # Size of dynamic candidate list during construction (default: 64)
# Search configuration
search:
# Number of top results to retrieve for similarity comparison
topk: 1 # We only need the most similar entry for cache lookup
# Logging and monitoring
logging:
# Log level for Redis client operations (debug, info, warn, error)
level: "info"
# Enable query/search logging for debugging
enable_query_log: false
# Enable performance metrics collection
enable_metrics: true
# Development and debugging settings
development:
# Drop index on startup (WARNING: This will delete all cached data)
drop_index_on_startup: true # Enable for development to test dynamic dimensions
# Create index if it doesn't exist
auto_create_index: true
# Print detailed error messages
verbose_errors: true
# (Deprecated) Or you can set up the redis connection using the below config:
# backend_config_path: "config/semantic-cache/redis.yaml"

# Embedding model for semantic similarity matching
# Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
# Default: "bert" (fastest, lowest memory)
Expand Down
126 changes: 86 additions & 40 deletions src/semantic-router/pkg/cache/cache_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,38 +41,77 @@ func NewCacheBackend(config CacheConfig) (CacheBackend, error) {
return NewInMemoryCache(options), nil

case MilvusCacheType:
logging.Debugf("Creating Milvus cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f",
config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold)
options := MilvusCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
ConfigPath: config.BackendConfigPath,
var options MilvusCacheOptions
if config.Milvus != nil {
logging.Debugf("Creating Milvus cache backend - Config: %v, TTL: %ds, Threshold: %.3f",
config.Milvus, config.TTLSeconds, config.SimilarityThreshold)
options = MilvusCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
Config: config.Milvus,
}
} else {
logging.Debugf("(Deprecated) Creating Milvus cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f",
config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold)
options = MilvusCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
ConfigPath: config.BackendConfigPath,
}
}
return NewMilvusCache(options)

case RedisCacheType:
logging.Debugf("Creating Redis cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f",
config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold)
options := RedisCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
ConfigPath: config.BackendConfigPath,
var options RedisCacheOptions
if config.Redis != nil {
logging.Debugf("Creating Redis cache backend - Config: %v, TTL: %ds, Threshold: %.3f",
config.Redis, config.TTLSeconds, config.SimilarityThreshold)
options = RedisCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
Config: config.Redis,
}
} else {
logging.Debugf("(Deprecated) Creating Redis cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f",
config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold)
options = RedisCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
ConfigPath: config.BackendConfigPath,
}
}
return NewRedisCache(options)

case HybridCacheType:
logging.Debugf("Creating Hybrid cache backend - MaxMemory: %d, TTL: %ds, Threshold: %.3f",
config.MaxMemoryEntries, config.TTLSeconds, config.SimilarityThreshold)
options := HybridCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
MaxMemoryEntries: config.MaxMemoryEntries,
HNSWM: config.HNSWM,
HNSWEfConstruction: config.HNSWEfConstruction,
MilvusConfigPath: config.BackendConfigPath,
var options HybridCacheOptions
if config.Milvus != nil {
logging.Debugf("Creating Hybrid cache backend - Config: %v, TTL: %ds, Threshold: %.3f",
config.Milvus, config.TTLSeconds, config.SimilarityThreshold)
options = HybridCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
MaxMemoryEntries: config.MaxMemoryEntries,
HNSWM: config.HNSWM,
HNSWEfConstruction: config.HNSWEfConstruction,
Milvus: config.Milvus,
}
} else {
logging.Debugf("(Deprecated) Creating Hybrid cache backend - MaxMemory: %d, TTL: %ds, Threshold: %.3f",
config.MaxMemoryEntries, config.TTLSeconds, config.SimilarityThreshold)
options = HybridCacheOptions{
Enabled: config.Enabled,
SimilarityThreshold: config.SimilarityThreshold,
TTLSeconds: config.TTLSeconds,
MaxMemoryEntries: config.MaxMemoryEntries,
HNSWM: config.HNSWM,
HNSWEfConstruction: config.HNSWEfConstruction,
MilvusConfigPath: config.BackendConfigPath,
}
}
return NewHybridCache(options)

Expand Down Expand Up @@ -112,25 +151,32 @@ func ValidateCacheConfig(config CacheConfig) error {
return fmt.Errorf("unsupported eviction_policy: %s", config.EvictionPolicy)
}
case MilvusCacheType:
if config.BackendConfigPath == "" {
return fmt.Errorf("backend_config_path is required for Milvus cache backend")
}
// Ensure the Milvus configuration file exists
if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) {
logging.Debugf("Milvus config file not found: %s", config.BackendConfigPath)
return fmt.Errorf("milvus config file not found: %s", config.BackendConfigPath)
if config.Milvus == nil {
logging.Debugf("Milvus configuration not provided. Using backend_config_path: %s", config.BackendConfigPath)
if config.BackendConfigPath == "" {
return fmt.Errorf("backend_config_path is required for Milvus cache backend")
}
// Ensure the Milvus configuration file exists
if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) {
logging.Debugf("Milvus config file not found: %s", config.BackendConfigPath)
return fmt.Errorf("milvus config file not found: %s", config.BackendConfigPath)
}
logging.Debugf("Milvus config file found: %s", config.BackendConfigPath)
}
logging.Debugf("Milvus config file found: %s", config.BackendConfigPath)
logging.Debugf("Milvus configuration: %+v", config.Milvus)
case RedisCacheType:
if config.BackendConfigPath == "" {
return fmt.Errorf("backend_config_path is required for Redis cache backend")
}
// Ensure the Redis configuration file exists
if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) {
logging.Debugf("Redis config file not found: %s", config.BackendConfigPath)
return fmt.Errorf("redis config file not found: %s", config.BackendConfigPath)
if config.Redis == nil {
logging.Debugf("Redis configuration not provided. Using backend_config_path: %s", config.BackendConfigPath)
if config.BackendConfigPath == "" {
return fmt.Errorf("backend_config_path is required for Redis cache backend")
}
// Ensure the Redis configuration file exists
if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) {
logging.Debugf("Redis config file not found: %s", config.BackendConfigPath)
return fmt.Errorf("redis config file not found: %s", config.BackendConfigPath)
}
logging.Debugf("Redis config file found: %s", config.BackendConfigPath)
}
logging.Debugf("Redis config file found: %s", config.BackendConfigPath)
}

return nil
Expand Down
14 changes: 12 additions & 2 deletions src/semantic-router/pkg/cache/cache_interface.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package cache

import "time"
import (
"time"

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// CacheEntry represents a complete cached request-response pair with associated metadata
type CacheEntry struct {
Expand Down Expand Up @@ -112,7 +116,13 @@ type CacheConfig struct {
// EvictionPolicy defines the eviction policy for in-memory cache ("fifo", "lru", "lfu")
EvictionPolicy EvictionPolicyType `yaml:"eviction_policy,omitempty"`

// BackendConfigPath points to backend-specific configuration files
// Redis specific settings
Redis *config.RedisConfig `yaml:"redis,omitempty"`

// Milvus specific settings
Milvus *config.MilvusConfig `yaml:"milvus,omitempty"`

// BackendConfigPath points to backend-specific configuration files (Deprecated)
BackendConfigPath string `yaml:"backend_config_path,omitempty"`

// UseHNSW enables HNSW index for faster search in memory backend
Expand Down
Loading
Loading