Skip to content

Commit 17b285f

Browse files
committed
Declared SSE content type for streaming_query endpoints
1 parent b9c1061 commit 17b285f

File tree

5 files changed

+151
-73
lines changed

5 files changed

+151
-73
lines changed

docs/openapi.json

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,7 +1500,7 @@
15001500
"streaming_query"
15011501
],
15021502
"summary": "Streaming Query Endpoint Handler",
1503-
"description": "Handle request to the /streaming_query endpoint using Agent API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Agent API specific retrieve_response and response generator functions.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException: Returns HTTP 500 if unable to connect to the\n Llama Stack server.",
1503+
"description": "Handle request to the /streaming_query endpoint using Agent API.\n\nReturns a streaming response using Server-Sent Events (SSE) format with\ncontent type text/event-stream.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle with content type\n text/event-stream.\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 422: Unprocessable Entity - Request validation failed\n - 429: Too Many Requests - Quota limit exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend",
15041504
"operationId": "streaming_query_endpoint_handler_v1_streaming_query_post",
15051505
"requestBody": {
15061506
"content": {
@@ -1514,16 +1514,14 @@
15141514
},
15151515
"responses": {
15161516
"200": {
1517-
"description": "Streaming response (Server-Sent Events)",
1517+
"description": "Successful response",
15181518
"content": {
1519-
"application/json": {
1520-
"schema": {}
1521-
},
15221519
"text/event-stream": {
15231520
"schema": {
1524-
"type": "string"
1521+
"type": "string",
1522+
"format": "text/event-stream"
15251523
},
1526-
"example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
1524+
"example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"No Violation\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 1, \"token\": \"\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 2, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 3, \"token\": \"!\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 4, \"token\": \" How\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 5, \"token\": \" can\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 6, \"token\": \" I\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 7, \"token\": \" assist\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 8, \"token\": \" you\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 9, \"token\": \" today\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 10, \"token\": \"?\"}}\n\ndata: {\"event\": \"turn_complete\", \"data\": {\"token\": \"Hello! How can I assist you today?\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"rag_chunks\": [], \"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 11, \"output_tokens\": 19, \"available_quotas\": {}}}\n\n"
15271525
}
15281526
}
15291527
},
@@ -3719,7 +3717,7 @@
37193717
"streaming_query_v2"
37203718
],
37213719
"summary": "Streaming Query Endpoint Handler V2",
3722-
"description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException: Returns HTTP 500 if unable to connect to the\n Llama Stack server.",
3720+
"description": "Handle request to the /streaming_query endpoint using Responses API.\n\nReturns a streaming response using Server-Sent Events (SSE) format with\ncontent type text/event-stream.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle with content type\n text/event-stream.\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 422: Unprocessable Entity - Request validation failed\n - 429: Too Many Requests - Quota limit exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend",
37233721
"operationId": "streaming_query_endpoint_handler_v2_v2_streaming_query_post",
37243722
"requestBody": {
37253723
"content": {
@@ -3733,19 +3731,14 @@
37333731
},
37343732
"responses": {
37353733
"200": {
3736-
"description": "Streaming response with Server-Sent Events",
3734+
"description": "Successful response",
37373735
"content": {
3738-
"application/json": {
3739-
"schema": {
3740-
"type": "string",
3741-
"example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
3742-
}
3743-
},
3744-
"text/plain": {
3736+
"text/event-stream": {
37453737
"schema": {
37463738
"type": "string",
3747-
"example": "Hello world!\n\n---\n\nReference: https://example.com/doc"
3748-
}
3739+
"format": "text/event-stream"
3740+
},
3741+
"example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"No Violation\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 1, \"token\": \"\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 2, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 3, \"token\": \"!\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 4, \"token\": \" How\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 5, \"token\": \" can\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 6, \"token\": \" I\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 7, \"token\": \" assist\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 8, \"token\": \" you\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 9, \"token\": \" today\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 10, \"token\": \"?\"}}\n\ndata: {\"event\": \"turn_complete\", \"data\": {\"token\": \"Hello! How can I assist you today?\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"rag_chunks\": [], \"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 11, \"output_tokens\": 19, \"available_quotas\": {}}}\n\n"
37493742
}
37503743
}
37513744
},

src/app/endpoints/streaming_query.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
NotFoundResponse,
5656
QuotaExceededResponse,
5757
ServiceUnavailableResponse,
58+
StreamingQueryResponse,
5859
UnauthorizedResponse,
5960
UnprocessableEntityResponse,
6061
)
@@ -76,22 +77,7 @@
7677

7778

7879
streaming_query_responses: dict[int | str, dict[str, Any]] = {
79-
200: {
80-
"description": "Streaming response (Server-Sent Events)",
81-
"content": {
82-
"text/event-stream": {
83-
"schema": {"type": "string"},
84-
"example": (
85-
'data: {"event": "start", '
86-
'"data": {"conversation_id": "123e4567-e89b-12d3-a456-426614174000"}}\n\n'
87-
'data: {"event": "token", "data": {"id": 0, "token": "Hello"}}\n\n'
88-
'data: {"event": "end", "data": {"referenced_documents": [], '
89-
'"truncated": null, "input_tokens": 0, "output_tokens": 0}, '
90-
'"available_quotas": {}}\n\n'
91-
),
92-
}
93-
},
94-
},
80+
200: StreamingQueryResponse.openapi_response(),
9581
401: UnauthorizedResponse.openapi_response(
9682
examples=["missing header", "missing token"]
9783
),
@@ -937,7 +923,11 @@ async def error_generator() -> AsyncGenerator[str, None]:
937923
return StreamingResponse(error_generator(), media_type=content_type)
938924

939925

940-
@router.post("/streaming_query", responses=streaming_query_responses)
926+
@router.post(
927+
"/streaming_query",
928+
response_class=StreamingResponse,
929+
responses=streaming_query_responses,
930+
)
941931
@authorize(Action.STREAMING_QUERY)
942932
async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals,too-many-statements
943933
request: Request,
@@ -948,16 +938,23 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals,t
948938
"""
949939
Handle request to the /streaming_query endpoint using Agent API.
950940
951-
This is a wrapper around streaming_query_endpoint_handler_base that provides
952-
the Agent API specific retrieve_response and response generator functions.
941+
Returns a streaming response using Server-Sent Events (SSE) format with
942+
content type text/event-stream.
953943
954944
Returns:
955945
StreamingResponse: An HTTP streaming response yielding
956-
SSE-formatted events for the query lifecycle.
946+
SSE-formatted events for the query lifecycle with content type
947+
text/event-stream.
957948
958949
Raises:
959-
HTTPException: Returns HTTP 500 if unable to connect to the
960-
Llama Stack server.
950+
HTTPException:
951+
- 401: Unauthorized - Missing or invalid credentials
952+
- 403: Forbidden - Insufficient permissions or model override not allowed
953+
- 404: Not Found - Conversation, model, or provider not found
954+
- 422: Unprocessable Entity - Request validation failed
955+
- 429: Too Many Requests - Quota limit exceeded
956+
- 500: Internal Server Error - Configuration not loaded or other server errors
957+
- 503: Service Unavailable - Unable to connect to Llama Stack backend
961958
"""
962959
return await streaming_query_endpoint_handler_base(
963960
request=request,

src/app/endpoints/streaming_query_v2.py

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
NotFoundResponse,
4141
QuotaExceededResponse,
4242
ServiceUnavailableResponse,
43+
StreamingQueryResponse,
4344
UnauthorizedResponse,
4445
UnprocessableEntityResponse,
4546
)
@@ -58,30 +59,7 @@
5859
auth_dependency = get_auth_dependency()
5960

6061
streaming_query_v2_responses: dict[int | str, dict[str, Any]] = {
61-
200: {
62-
"description": "Streaming response with Server-Sent Events",
63-
"content": {
64-
"application/json": {
65-
"schema": {
66-
"type": "string",
67-
"example": (
68-
'data: {"event": "start", '
69-
'"data": {"conversation_id": "123e4567-e89b-12d3-a456-426614174000"}}\n\n'
70-
'data: {"event": "token", "data": {"id": 0, "token": "Hello"}}\n\n'
71-
'data: {"event": "end", "data": {"referenced_documents": [], '
72-
'"truncated": null, "input_tokens": 0, "output_tokens": 0}, '
73-
'"available_quotas": {}}\n\n'
74-
),
75-
}
76-
},
77-
"text/plain": {
78-
"schema": {
79-
"type": "string",
80-
"example": "Hello world!\n\n---\n\nReference: https://example.com/doc",
81-
}
82-
},
83-
},
84-
},
62+
200: StreamingQueryResponse.openapi_response(),
8563
401: UnauthorizedResponse.openapi_response(
8664
examples=["missing header", "missing token"]
8765
),
@@ -313,7 +291,11 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
313291
return response_generator
314292

315293

316-
@router.post("/streaming_query", responses=streaming_query_v2_responses)
294+
@router.post(
295+
"/streaming_query",
296+
response_class=StreamingResponse,
297+
responses=streaming_query_v2_responses,
298+
)
317299
@authorize(Action.STREAMING_QUERY)
318300
async def streaming_query_endpoint_handler_v2( # pylint: disable=too-many-locals
319301
request: Request,
@@ -324,16 +306,23 @@ async def streaming_query_endpoint_handler_v2( # pylint: disable=too-many-local
324306
"""
325307
Handle request to the /streaming_query endpoint using Responses API.
326308
327-
This is a wrapper around streaming_query_endpoint_handler_base that provides
328-
the Responses API specific retrieve_response and response generator functions.
309+
Returns a streaming response using Server-Sent Events (SSE) format with
310+
content type text/event-stream.
329311
330312
Returns:
331313
StreamingResponse: An HTTP streaming response yielding
332-
SSE-formatted events for the query lifecycle.
314+
SSE-formatted events for the query lifecycle with content type
315+
text/event-stream.
333316
334317
Raises:
335-
HTTPException: Returns HTTP 500 if unable to connect to the
336-
Llama Stack server.
318+
HTTPException:
319+
- 401: Unauthorized - Missing or invalid credentials
320+
- 403: Forbidden - Insufficient permissions or model override not allowed
321+
- 404: Not Found - Conversation, model, or provider not found
322+
- 422: Unprocessable Entity - Request validation failed
323+
- 429: Too Many Requests - Quota limit exceeded
324+
- 500: Internal Server Error - Configuration not loaded or other server errors
325+
- 503: Service Unavailable - Unable to connect to Llama Stack backend
337326
"""
338327
return await streaming_query_endpoint_handler_base(
339328
request=request,

src/models/responses.py

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from quota.quota_exceed_error import QuotaExceedError
1212
from models.config import Action, Configuration
1313

14+
SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response"
1415
BAD_REQUEST_DESCRIPTION = "Invalid request format"
1516
UNAUTHORIZED_DESCRIPTION = "Unauthorized"
1617
FORBIDDEN_DESCRIPTION = "Permission denied"
@@ -52,7 +53,7 @@ def openapi_response(cls) -> dict[str, Any]:
5253
content = {"application/json": {"example": example_value}}
5354

5455
return {
55-
"description": "Successful response",
56+
"description": SUCCESSFUL_RESPONSE_DESCRIPTION,
5657
"model": cls,
5758
"content": content,
5859
}
@@ -449,6 +450,74 @@ class QueryResponse(AbstractSuccessfulResponse):
449450
}
450451

451452

453+
class StreamingQueryResponse(AbstractSuccessfulResponse):
454+
"""Documentation-only model for streaming query responses using Server-Sent Events (SSE)."""
455+
456+
@classmethod
457+
def openapi_response(cls) -> dict[str, Any]:
458+
"""Generate FastAPI response dict for SSE streaming with examples.
459+
460+
Note: This is used for OpenAPI documentation only. The actual endpoint
461+
returns a StreamingResponse object, not this Pydantic model.
462+
"""
463+
schema = cls.model_json_schema()
464+
model_examples = schema.get("examples")
465+
if not model_examples:
466+
raise SchemaError(f"Examples not found in {cls.__name__}")
467+
example_value = model_examples[0]
468+
content = {
469+
"text/event-stream": {
470+
"schema": {"type": "string", "format": "text/event-stream"},
471+
"example": example_value,
472+
}
473+
}
474+
475+
return {
476+
"description": SUCCESSFUL_RESPONSE_DESCRIPTION,
477+
"content": content,
478+
# Note: No "model" key since we're not actually serializing this model
479+
}
480+
481+
model_config = {
482+
"json_schema_extra": {
483+
"examples": [
484+
(
485+
'data: {"event": "start", "data": {'
486+
'"conversation_id": "123e4567-e89b-12d3-a456-426614174000"}}\n\n'
487+
'data: {"event": "token", "data": {'
488+
'"id": 0, "token": "No Violation"}}\n\n'
489+
'data: {"event": "token", "data": {'
490+
'"id": 1, "token": ""}}\n\n'
491+
'data: {"event": "token", "data": {'
492+
'"id": 2, "token": "Hello"}}\n\n'
493+
'data: {"event": "token", "data": {'
494+
'"id": 3, "token": "!"}}\n\n'
495+
'data: {"event": "token", "data": {'
496+
'"id": 4, "token": " How"}}\n\n'
497+
'data: {"event": "token", "data": {'
498+
'"id": 5, "token": " can"}}\n\n'
499+
'data: {"event": "token", "data": {'
500+
'"id": 6, "token": " I"}}\n\n'
501+
'data: {"event": "token", "data": {'
502+
'"id": 7, "token": " assist"}}\n\n'
503+
'data: {"event": "token", "data": {'
504+
'"id": 8, "token": " you"}}\n\n'
505+
'data: {"event": "token", "data": {'
506+
'"id": 9, "token": " today"}}\n\n'
507+
'data: {"event": "token", "data": {'
508+
'"id": 10, "token": "?"}}\n\n'
509+
'data: {"event": "turn_complete", "data": {'
510+
'"token": "Hello! How can I assist you today?"}}\n\n'
511+
'data: {"event": "end", "data": {'
512+
'"rag_chunks": [], "referenced_documents": [], '
513+
'"truncated": null, "input_tokens": 11, "output_tokens": 19, '
514+
'"available_quotas": {}}}\n\n'
515+
),
516+
]
517+
}
518+
}
519+
520+
452521
class InfoResponse(AbstractSuccessfulResponse):
453522
"""Model representing a response to an info request.
454523
@@ -806,7 +875,7 @@ def openapi_response(cls) -> dict[str, Any]:
806875
content = {"application/json": {"examples": named_examples or None}}
807876

808877
return {
809-
"description": "Successful response",
878+
"description": SUCCESSFUL_RESPONSE_DESCRIPTION,
810879
"model": cls,
811880
"content": content,
812881
}

0 commit comments

Comments
 (0)