Skip to content

Commit 659a32c

Browse files
committed
Ability to store conversation into the cache
1 parent a32f966 commit 659a32c

File tree

3 files changed

+49
-0
lines changed

3 files changed

+49
-0
lines changed

src/app/endpoints/query.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from client import AsyncLlamaStackClientHolder
3434
from configuration import configuration
3535
from metrics.utils import update_llm_token_count_from_turn
36+
from models.cache_entry import CacheEntry
3637
from models.config import Action
3738
from models.database.conversations import UserConversation
3839
from models.requests import Attachment, QueryRequest
@@ -47,6 +48,7 @@
4748
check_configuration_loaded,
4849
get_agent,
4950
get_system_prompt,
51+
store_conversation_into_cache,
5052
validate_conversation_ownership,
5153
validate_model_provider_override,
5254
)
@@ -279,6 +281,16 @@ async def query_endpoint_handler(
279281
provider_id=provider_id,
280282
)
281283

284+
store_conversation_into_cache(
285+
configuration,
286+
user_id,
287+
conversation_id,
288+
provider_id,
289+
model_id,
290+
query_request.query,
291+
summary.llm_response,
292+
)
293+
282294
# Convert tool calls to response format
283295
logger.info("Processing tool calls...")
284296
tool_calls = [

src/app/endpoints/streaming_query.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
check_configuration_loaded,
4848
get_agent,
4949
get_system_prompt,
50+
store_conversation_into_cache,
5051
validate_model_provider_override,
5152
)
5253
from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
@@ -704,6 +705,16 @@ async def response_generator(
704705
attachments=query_request.attachments or [],
705706
)
706707

708+
store_conversation_into_cache(
709+
configuration,
710+
user_id,
711+
conversation_id,
712+
provider_id,
713+
model_id,
714+
query_request.query,
715+
summary.llm_response,
716+
)
717+
707718
persist_user_conversation_details(
708719
user_id=user_id,
709720
conversation_id=conversation_id,

src/utils/endpoints.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from llama_stack_client.lib.agents.agent import AsyncAgent
77

88
import constants
9+
from models.cache_entry import CacheEntry
910
from models.requests import QueryRequest
1011
from models.database.conversations import UserConversation
1112
from models.config import Action
@@ -135,6 +136,31 @@ def validate_model_provider_override(
135136
)
136137

137138

139+
# # pylint: disable=R0913,R0917
140+
def store_conversation_into_cache(
141+
config: AppConfig,
142+
user_id: str,
143+
conversation_id: str,
144+
provider_id: str,
145+
model_id: str,
146+
query: str,
147+
response: str,
148+
) -> None:
149+
"""Store one part of conversation into conversation history cache."""
150+
if config.conversation_cache_configuration.type is not None:
151+
cache = config.conversation_cache
152+
if cache is None:
153+
logger.warning("Conversation cache configured but not initialized")
154+
return
155+
cache_entry = CacheEntry(
156+
query=query,
157+
response=response,
158+
provider=provider_id,
159+
model=model_id,
160+
)
161+
cache.insert_or_append(user_id, conversation_id, cache_entry, False)
162+
163+
138164
# # pylint: disable=R0913,R0917
139165
async def get_agent(
140166
client: AsyncLlamaStackClient,

0 commit comments

Comments
 (0)