Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions litellm/litellm_core_utils/streaming_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
)
from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.types.llms.openai import ChatCompletionChunk
from litellm.types.llms.openai import OpenAIChatCompletionChunk
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import (
Delta,
Expand Down Expand Up @@ -745,7 +745,7 @@ def set_model_id(

def copy_model_response_level_provider_specific_fields(
self,
original_chunk: Union[ModelResponseStream, ChatCompletionChunk],
original_chunk: Union[ModelResponseStream, OpenAIChatCompletionChunk],
model_response: ModelResponseStream,
) -> ModelResponseStream:
"""
Expand Down Expand Up @@ -1012,6 +1012,15 @@ def return_processed_chunk_logic( # noqa
# if delta is None
_is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta)

# Preserve custom attributes from original chunk (applies to both
# empty and non-empty delta final chunks).
_original_chunk = response_obj.get("original_chunk", None)
if _original_chunk is not None:
preserve_upstream_non_openai_attributes(
model_response=model_response,
original_chunk=_original_chunk,
)

if _is_delta_empty:
model_response.choices[0].delta = Delta(
content=None
Expand Down
73 changes: 73 additions & 0 deletions tests/test_litellm/litellm_core_utils/test_streaming_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,79 @@ def test_streaming_handler_with_stop_chunk(
assert returned_chunk is None


def test_finish_reason_chunk_preserves_non_openai_attributes(
initialized_custom_stream_wrapper: CustomStreamWrapper,
):
"""
Regression test for #23444:
Preserve upstream non-OpenAI attributes on final finish_reason chunk.
"""
initialized_custom_stream_wrapper.received_finish_reason = "stop"

original_chunk = ModelResponseStream(
id="chatcmpl-test",
created=1742093326,
model=None,
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason="stop",
index=0,
delta=Delta(content=""),
logprobs=None,
)
],
)
setattr(original_chunk, "custom_field", {"key": "value"})

returned_chunk = initialized_custom_stream_wrapper.return_processed_chunk_logic(
completion_obj={"content": ""},
response_obj={"original_chunk": original_chunk},
model_response=ModelResponseStream(),
)

assert returned_chunk is not None
assert getattr(returned_chunk, "custom_field", None) == {"key": "value"}


def test_finish_reason_with_holding_chunk_preserves_non_openai_attributes(
initialized_custom_stream_wrapper: CustomStreamWrapper,
):
"""
Regression test for #23444 holding-chunk path:
preserve custom attributes when _is_delta_empty is False after flushing
holding_chunk.
"""
initialized_custom_stream_wrapper.received_finish_reason = "stop"
initialized_custom_stream_wrapper.holding_chunk = "filtered text"

original_chunk = ModelResponseStream(
id="chatcmpl-test-2",
created=1742093327,
model=None,
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason="stop",
index=0,
delta=Delta(content=""),
logprobs=None,
)
],
)
setattr(original_chunk, "custom_field", {"key": "value"})

returned_chunk = initialized_custom_stream_wrapper.return_processed_chunk_logic(
completion_obj={"content": ""},
response_obj={"original_chunk": original_chunk},
model_response=ModelResponseStream(),
)

assert returned_chunk is not None
assert returned_chunk.choices[0].delta.content == "filtered text"
assert getattr(returned_chunk, "custom_field", None) == {"key": "value"}


def test_set_response_id_propagation_empty_to_valid(
initialized_custom_stream_wrapper: CustomStreamWrapper,
):
Expand Down
Loading