Ability to store conversation into the cache

tisnik · tisnik · commit 659a32c52968 · 2025-09-26T11:33:16.000+02:00
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -33,6 +33,7 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from metrics.utils import update_llm_token_count_from_turn
+from models.cache_entry import CacheEntry
 from models.config import Action
 from models.database.conversations import UserConversation
 from models.requests import Attachment, QueryRequest
@@ -47,6 +48,7 @@
     check_configuration_loaded,
     get_agent,
     get_system_prompt,
+    store_conversation_into_cache,
     validate_conversation_ownership,
     validate_model_provider_override,
 )
@@ -279,6 +281,16 @@ async def query_endpoint_handler(
             provider_id=provider_id,
         )
 
+        store_conversation_into_cache(
+            configuration,
+            user_id,
+            conversation_id,
+            provider_id,
+            model_id,
+            query_request.query,
+            summary.llm_response,
+        )
+
         # Convert tool calls to response format
         logger.info("Processing tool calls...")
         tool_calls = [
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -47,6 +47,7 @@
     check_configuration_loaded,
     get_agent,
     get_system_prompt,
+    store_conversation_into_cache,
     validate_model_provider_override,
 )
 from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
@@ -704,6 +705,16 @@ async def response_generator(
                     attachments=query_request.attachments or [],
                 )
 
+            store_conversation_into_cache(
+                configuration,
+                user_id,
+                conversation_id,
+                provider_id,
+                model_id,
+                query_request.query,
+                summary.llm_response,
+            )
+
         persist_user_conversation_details(
             user_id=user_id,
             conversation_id=conversation_id,
diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py
@@ -6,6 +6,7 @@
 from llama_stack_client.lib.agents.agent import AsyncAgent
 
 import constants
+from models.cache_entry import CacheEntry
 from models.requests import QueryRequest
 from models.database.conversations import UserConversation
 from models.config import Action
@@ -135,6 +136,31 @@ def validate_model_provider_override(
         )
 
 
+# # pylint: disable=R0913,R0917
+def store_conversation_into_cache(
+    config: AppConfig,
+    user_id: str,
+    conversation_id: str,
+    provider_id: str,
+    model_id: str,
+    query: str,
+    response: str,
+) -> None:
+    """Store one part of conversation into conversation history cache."""
+    if config.conversation_cache_configuration.type is not None:
+        cache = config.conversation_cache
+        if cache is None:
+            logger.warning("Conversation cache configured but not initialized")
+            return
+        cache_entry = CacheEntry(
+            query=query,
+            response=response,
+            provider=provider_id,
+            model=model_id,
+        )
+        cache.insert_or_append(user_id, conversation_id, cache_entry, False)
+
+
 # # pylint: disable=R0913,R0917
 async def get_agent(
     client: AsyncLlamaStackClient,