File tree Expand file tree Collapse file tree 3 files changed +49
-0
lines changed Expand file tree Collapse file tree 3 files changed +49
-0
lines changed Original file line number Diff line number Diff line change 3333from client import AsyncLlamaStackClientHolder
3434from configuration import configuration
3535from metrics .utils import update_llm_token_count_from_turn
36+ from models .cache_entry import CacheEntry
3637from models .config import Action
3738from models .database .conversations import UserConversation
3839from models .requests import Attachment , QueryRequest
4748 check_configuration_loaded ,
4849 get_agent ,
4950 get_system_prompt ,
51+ store_conversation_into_cache ,
5052 validate_conversation_ownership ,
5153 validate_model_provider_override ,
5254)
@@ -279,6 +281,16 @@ async def query_endpoint_handler(
279281 provider_id = provider_id ,
280282 )
281283
284+ store_conversation_into_cache (
285+ configuration ,
286+ user_id ,
287+ conversation_id ,
288+ provider_id ,
289+ model_id ,
290+ query_request .query ,
291+ summary .llm_response ,
292+ )
293+
282294 # Convert tool calls to response format
283295 logger .info ("Processing tool calls..." )
284296 tool_calls = [
Original file line number Diff line number Diff line change 4747 check_configuration_loaded ,
4848 get_agent ,
4949 get_system_prompt ,
50+ store_conversation_into_cache ,
5051 validate_model_provider_override ,
5152)
5253from utils .mcp_headers import handle_mcp_headers_with_toolgroups , mcp_headers_dependency
@@ -704,6 +705,16 @@ async def response_generator(
704705 attachments = query_request .attachments or [],
705706 )
706707
708+ store_conversation_into_cache (
709+ configuration ,
710+ user_id ,
711+ conversation_id ,
712+ provider_id ,
713+ model_id ,
714+ query_request .query ,
715+ summary .llm_response ,
716+ )
717+
707718 persist_user_conversation_details (
708719 user_id = user_id ,
709720 conversation_id = conversation_id ,
Original file line number Diff line number Diff line change 66from llama_stack_client .lib .agents .agent import AsyncAgent
77
88import constants
9+ from models .cache_entry import CacheEntry
910from models .requests import QueryRequest
1011from models .database .conversations import UserConversation
1112from models .config import Action
@@ -135,6 +136,31 @@ def validate_model_provider_override(
135136 )
136137
137138
139+ # # pylint: disable=R0913,R0917
140+ def store_conversation_into_cache (
141+ config : AppConfig ,
142+ user_id : str ,
143+ conversation_id : str ,
144+ provider_id : str ,
145+ model_id : str ,
146+ query : str ,
147+ response : str ,
148+ ) -> None :
149+ """Store one part of conversation into conversation history cache."""
150+ if config .conversation_cache_configuration .type is not None :
151+ cache = config .conversation_cache
152+ if cache is None :
153+ logger .warning ("Conversation cache configured but not initialized" )
154+ return
155+ cache_entry = CacheEntry (
156+ query = query ,
157+ response = response ,
158+ provider = provider_id ,
159+ model = model_id ,
160+ )
161+ cache .insert_or_append (user_id , conversation_id , cache_entry , False )
162+
163+
138164# # pylint: disable=R0913,R0917
139165async def get_agent (
140166 client : AsyncLlamaStackClient ,
You can’t perform that action at this time.
0 commit comments