Skip to content

Commit 3d4e2c5

Browse files
committed
Configuration for 0.3.0 + OLS compatibility fixes
1 parent 4bb351d commit 3d4e2c5

25 files changed

+1244
-1476
lines changed

docs/openapi.json

Lines changed: 330 additions & 764 deletions
Large diffs are not rendered by default.

run.yaml

Lines changed: 113 additions & 149 deletions
Large diffs are not rendered by default.

src/app/endpoints/conversations_v2.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
examples=["missing header", "missing token"]
5050
),
5151
403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
52-
404: NotFoundResponse.openapi_response(examples=["conversation"]),
5352
500: InternalServerErrorResponse.openapi_response(
5453
examples=["conversation cache", "configuration"]
5554
),
@@ -162,8 +161,6 @@ async def delete_conversation_endpoint_handler(
162161
response = InternalServerErrorResponse.cache_unavailable()
163162
raise HTTPException(**response.model_dump())
164163

165-
check_conversation_existence(user_id, conversation_id)
166-
167164
logger.info("Deleting conversation %s for user %s", conversation_id, user_id)
168165
deleted = configuration.conversation_cache.delete(
169166
user_id, conversation_id, skip_userid_check

src/app/endpoints/conversations_v3.py

Lines changed: 52 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
"""Handler for REST API calls to manage conversation history using Conversations API."""
22

33
import logging
4-
from typing import Any, cast
4+
from typing import Any
55

66
from fastapi import APIRouter, Depends, HTTPException, Request, status
77
from llama_stack_client import (
88
APIConnectionError,
9+
APIStatusError,
910
NOT_GIVEN,
10-
BadRequestError,
11-
NotFoundError,
12-
)
13-
from llama_stack_client.types.conversation_delete_response import (
14-
ConversationDeleteResponse as CDR,
1511
)
1612
from sqlalchemy.exc import SQLAlchemyError
1713

@@ -49,7 +45,7 @@
4945
)
5046

5147
logger = logging.getLogger("app.endpoints.handlers")
52-
router = APIRouter(tags=["conversations_v3"])
48+
router = APIRouter(tags=["conversations_v1"])
5349

5450
conversation_get_responses: dict[int | str, dict[str, Any]] = {
5551
200: ConversationResponse.openapi_response(),
@@ -74,7 +70,6 @@
7470
403: ForbiddenResponse.openapi_response(
7571
examples=["conversation delete", "endpoint"]
7672
),
77-
404: NotFoundResponse.openapi_response(examples=["conversation"]),
7873
500: InternalServerErrorResponse.openapi_response(
7974
examples=["database", "configuration"]
8075
),
@@ -121,7 +116,6 @@ def simplify_conversation_items(items: list[dict]) -> list[dict[str, Any]]:
121116

122117
# Group items by turns (user message -> assistant response)
123118
current_turn: dict[str, Any] = {"messages": []}
124-
125119
for item in items:
126120
item_type = item.get("type")
127121
item_role = item.get("role")
@@ -134,7 +128,8 @@ def simplify_conversation_items(items: list[dict]) -> list[dict[str, Any]]:
134128
text_content = ""
135129
for content_part in content:
136130
if isinstance(content_part, dict):
137-
if content_part.get("type") == "text":
131+
content_type = content_part.get("type")
132+
if content_type in ("input_text", "output_text", "text"):
138133
text_content += content_part.get("text", "")
139134
elif isinstance(content_part, str):
140135
text_content += content_part
@@ -157,7 +152,11 @@ def simplify_conversation_items(items: list[dict]) -> list[dict[str, Any]]:
157152
return chat_history
158153

159154

160-
@router.get("/conversations", responses=conversations_list_responses)
155+
@router.get(
156+
"/conversations",
157+
responses=conversations_list_responses,
158+
summary="Conversations List Endpoint Handler V1",
159+
)
161160
@authorize(Action.LIST_CONVERSATIONS)
162161
async def get_conversations_list_endpoint_handler(
163162
request: Request,
@@ -214,7 +213,11 @@ async def get_conversations_list_endpoint_handler(
214213
raise HTTPException(**response.model_dump()) from e
215214

216215

217-
@router.get("/conversations/{conversation_id}", responses=conversation_get_responses)
216+
@router.get(
217+
"/conversations/{conversation_id}",
218+
responses=conversation_get_responses,
219+
summary="Conversation Get Endpoint Handler V1",
220+
)
218221
@authorize(Action.GET_CONVERSATION)
219222
async def get_conversation_endpoint_handler(
220223
request: Request,
@@ -278,15 +281,18 @@ async def get_conversation_endpoint_handler(
278281
raise HTTPException(**response)
279282

280283
# If reached this, user is authorized to retrieve this conversation
281-
# Note: We check if conversation exists in DB but don't fail if it doesn't,
282-
# as it might exist in llama-stack but not be persisted yet
283284
try:
284285
conversation = retrieve_conversation(normalized_conv_id)
285286
if conversation is None:
286-
logger.warning(
287-
"Conversation %s not found in database, will try llama-stack",
287+
logger.error(
288+
"Conversation %s not found in database.",
288289
normalized_conv_id,
289290
)
291+
response = NotFoundResponse(
292+
resource="conversation", resource_id=normalized_conv_id
293+
).model_dump()
294+
raise HTTPException(**response)
295+
290296
except SQLAlchemyError as e:
291297
logger.error(
292298
"Database error occurred while retrieving conversation %s: %s",
@@ -318,13 +324,11 @@ async def get_conversation_endpoint_handler(
318324
limit=1000, # Max items to retrieve
319325
order="asc", # Get items in chronological order
320326
)
321-
322327
items = (
323328
conversation_items_response.data
324329
if hasattr(conversation_items_response, "data")
325330
else []
326331
)
327-
328332
# Convert items to dict format for processing
329333
items_dicts = [
330334
item.model_dump() if hasattr(item, "model_dump") else dict(item)
@@ -340,6 +344,7 @@ async def get_conversation_endpoint_handler(
340344
# Simplify the conversation items to include only essential information
341345
chat_history = simplify_conversation_items(items_dicts)
342346

347+
# Conversations api has no support for message level timestamps
343348
return ConversationResponse(
344349
conversation_id=normalized_conv_id,
345350
chat_history=chat_history,
@@ -352,7 +357,7 @@ async def get_conversation_endpoint_handler(
352357
).model_dump()
353358
raise HTTPException(**response) from e
354359

355-
except (NotFoundError, BadRequestError) as e:
360+
except APIStatusError as e:
356361
logger.error("Conversation not found: %s", e)
357362
response = NotFoundResponse(
358363
resource="conversation", resource_id=normalized_conv_id
@@ -361,7 +366,9 @@ async def get_conversation_endpoint_handler(
361366

362367

363368
@router.delete(
364-
"/conversations/{conversation_id}", responses=conversation_delete_responses
369+
"/conversations/{conversation_id}",
370+
responses=conversation_delete_responses,
371+
summary="Conversation Delete Endpoint Handler V1",
365372
)
366373
@authorize(Action.DELETE_CONVERSATION)
367374
async def delete_conversation_endpoint_handler(
@@ -420,24 +427,22 @@ async def delete_conversation_endpoint_handler(
420427

421428
# If reached this, user is authorized to delete this conversation
422429
try:
423-
conversation = retrieve_conversation(normalized_conv_id)
424-
if conversation is None:
425-
response = NotFoundResponse(
426-
resource="conversation", resource_id=normalized_conv_id
427-
).model_dump()
428-
raise HTTPException(**response)
429-
430+
local_deleted = delete_conversation(normalized_conv_id)
431+
if not local_deleted:
432+
logger.info(
433+
"Conversation %s not found locally when deleting.",
434+
normalized_conv_id,
435+
)
430436
except SQLAlchemyError as e:
431437
logger.error(
432-
"Database error occurred while retrieving conversation %s.",
438+
"Database error while deleting conversation %s",
433439
normalized_conv_id,
434440
)
435441
response = InternalServerErrorResponse.database_error()
436442
raise HTTPException(**response.model_dump()) from e
437443

438444
logger.info("Deleting conversation %s using Conversations API", normalized_conv_id)
439445

440-
delete_response: CDR | None = None
441446
try:
442447
# Get Llama Stack client
443448
client = AsyncLlamaStackClientHolder().get_client()
@@ -446,17 +451,13 @@ async def delete_conversation_endpoint_handler(
446451
llama_stack_conv_id = to_llama_stack_conversation_id(normalized_conv_id)
447452

448453
# Use Conversations API to delete the conversation
449-
delete_response = cast(
450-
CDR, await client.conversations.delete(conversation_id=llama_stack_conv_id)
454+
delete_response = await client.conversations.delete(
455+
conversation_id=llama_stack_conv_id
451456
)
452-
453-
logger.info("Successfully deleted conversation %s", normalized_conv_id)
454-
455-
deleted = delete_conversation(normalized_conv_id)
456-
457-
return ConversationDeleteResponse(
458-
conversation_id=normalized_conv_id,
459-
deleted=deleted and delete_response.deleted if delete_response else False,
457+
logger.info(
458+
"Remote deletion of %s successful (remote_deleted=%s)",
459+
normalized_conv_id,
460+
delete_response.deleted,
460461
)
461462

462463
except APIConnectionError as e:
@@ -467,28 +468,23 @@ async def delete_conversation_endpoint_handler(
467468
).model_dump(),
468469
) from e
469470

470-
except (NotFoundError, BadRequestError):
471-
# If not found in LlamaStack, still try to delete from local DB
471+
except APIStatusError:
472472
logger.warning(
473-
"Conversation %s not found in LlamaStack, cleaning up local DB",
473+
"Conversation %s in LlamaStack not found. Treating as already deleted.",
474474
normalized_conv_id,
475475
)
476-
deleted = delete_conversation(normalized_conv_id)
477-
return ConversationDeleteResponse(
478-
conversation_id=normalized_conv_id,
479-
deleted=deleted,
480-
)
481476

482-
except SQLAlchemyError as e:
483-
logger.error(
484-
"Database error occurred while deleting conversation %s.",
485-
normalized_conv_id,
486-
)
487-
response = InternalServerErrorResponse.database_error()
488-
raise HTTPException(**response.model_dump()) from e
477+
return ConversationDeleteResponse(
478+
conversation_id=normalized_conv_id,
479+
deleted=local_deleted,
480+
)
489481

490482

491-
@router.put("/conversations/{conversation_id}", responses=conversation_update_responses)
483+
@router.put(
484+
"/conversations/{conversation_id}",
485+
responses=conversation_update_responses,
486+
summary="Conversation Update Endpoint Handler V1",
487+
)
492488
@authorize(Action.UPDATE_CONVERSATION)
493489
async def update_conversation_endpoint_handler(
494490
request: Request,
@@ -609,7 +605,7 @@ async def update_conversation_endpoint_handler(
609605
).model_dump()
610606
raise HTTPException(**response) from e
611607

612-
except (NotFoundError, BadRequestError) as e:
608+
except APIStatusError as e:
613609
logger.error("Conversation not found: %s", e)
614610
response = NotFoundResponse(
615611
resource="conversation", resource_id=normalized_conv_id

src/app/endpoints/query.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
from typing import Annotated, Any, Optional, cast
99

1010
from fastapi import APIRouter, Depends, HTTPException, Request
11+
from litellm.exceptions import RateLimitError
1112
from llama_stack_client import (
1213
APIConnectionError,
14+
APIStatusError,
1315
AsyncLlamaStackClient, # type: ignore
1416
)
1517
from llama_stack_client.types import Shield, UserMessage # type: ignore
@@ -387,7 +389,6 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
387389
response = QueryResponse(
388390
conversation_id=conversation_id,
389391
response=summary.llm_response,
390-
rag_chunks=summary.rag_chunks if summary.rag_chunks else [],
391392
tool_calls=summary.tool_calls if summary.tool_calls else None,
392393
tool_results=summary.tool_results if summary.tool_results else None,
393394
referenced_documents=referenced_documents,
@@ -410,12 +411,21 @@ async def query_endpoint_handler_base( # pylint: disable=R0914
410411
)
411412
raise HTTPException(**response.model_dump()) from e
412413
except SQLAlchemyError as e:
413-
logger.exception("Error persisting conversation details: %s", e)
414+
logger.exception("Error persisting conversation details.")
414415
response = InternalServerErrorResponse.database_error()
415416
raise HTTPException(**response.model_dump()) from e
416-
except Exception as e:
417+
except RateLimitError as e:
417418
used_model = getattr(e, "model", "")
418-
response = QuotaExceededResponse.model(used_model)
419+
if used_model:
420+
response = QuotaExceededResponse.model(used_model)
421+
else:
422+
response = QuotaExceededResponse(
423+
response="The quota has been exceeded", cause=str(e)
424+
)
425+
raise HTTPException(**response.model_dump()) from e
426+
except APIStatusError as e:
427+
logger.exception("Error in query endpoint handler: %s", e)
428+
response = InternalServerErrorResponse.generic()
419429
raise HTTPException(**response.model_dump()) from e
420430

421431

0 commit comments

Comments
 (0)