diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index db2369d03d6..6e991e6911b 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -31,7 +31,7 @@ ) from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject from litellm.litellm_core_utils.thread_pool_executor import executor -from litellm.types.llms.openai import ChatCompletionChunk +from litellm.types.llms.openai import OpenAIChatCompletionChunk from litellm.types.router import GenericLiteLLMParams from litellm.types.utils import ( Delta, @@ -745,7 +745,7 @@ def set_model_id( def copy_model_response_level_provider_specific_fields( self, - original_chunk: Union[ModelResponseStream, ChatCompletionChunk], + original_chunk: Union[ModelResponseStream, OpenAIChatCompletionChunk], model_response: ModelResponseStream, ) -> ModelResponseStream: """ @@ -1012,6 +1012,15 @@ def return_processed_chunk_logic( # noqa # if delta is None _is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta) + # Preserve custom attributes from original chunk (applies to both + # empty and non-empty delta final chunks). + _original_chunk = response_obj.get("original_chunk", None) + if _original_chunk is not None: + preserve_upstream_non_openai_attributes( + model_response=model_response, + original_chunk=_original_chunk, + ) + if _is_delta_empty: model_response.choices[0].delta = Delta( content=None diff --git a/tests/test_litellm/litellm_core_utils/test_streaming_handler.py b/tests/test_litellm/litellm_core_utils/test_streaming_handler.py index 6a64e7020b9..5d7b291e7b3 100644 --- a/tests/test_litellm/litellm_core_utils/test_streaming_handler.py +++ b/tests/test_litellm/litellm_core_utils/test_streaming_handler.py @@ -615,6 +615,79 @@ def test_streaming_handler_with_stop_chunk( assert returned_chunk is None +def test_finish_reason_chunk_preserves_non_openai_attributes( + initialized_custom_stream_wrapper: CustomStreamWrapper, +): + """ + Regression test for #23444: + Preserve upstream non-OpenAI attributes on final finish_reason chunk. + """ + initialized_custom_stream_wrapper.received_finish_reason = "stop" + + original_chunk = ModelResponseStream( + id="chatcmpl-test", + created=1742093326, + model=None, + object="chat.completion.chunk", + choices=[ + StreamingChoices( + finish_reason="stop", + index=0, + delta=Delta(content=""), + logprobs=None, + ) + ], + ) + setattr(original_chunk, "custom_field", {"key": "value"}) + + returned_chunk = initialized_custom_stream_wrapper.return_processed_chunk_logic( + completion_obj={"content": ""}, + response_obj={"original_chunk": original_chunk}, + model_response=ModelResponseStream(), + ) + + assert returned_chunk is not None + assert getattr(returned_chunk, "custom_field", None) == {"key": "value"} + + +def test_finish_reason_with_holding_chunk_preserves_non_openai_attributes( + initialized_custom_stream_wrapper: CustomStreamWrapper, +): + """ + Regression test for #23444 holding-chunk path: + preserve custom attributes when _is_delta_empty is False after flushing + holding_chunk. + """ + initialized_custom_stream_wrapper.received_finish_reason = "stop" + initialized_custom_stream_wrapper.holding_chunk = "filtered text" + + original_chunk = ModelResponseStream( + id="chatcmpl-test-2", + created=1742093327, + model=None, + object="chat.completion.chunk", + choices=[ + StreamingChoices( + finish_reason="stop", + index=0, + delta=Delta(content=""), + logprobs=None, + ) + ], + ) + setattr(original_chunk, "custom_field", {"key": "value"}) + + returned_chunk = initialized_custom_stream_wrapper.return_processed_chunk_logic( + completion_obj={"content": ""}, + response_obj={"original_chunk": original_chunk}, + model_response=ModelResponseStream(), + ) + + assert returned_chunk is not None + assert returned_chunk.choices[0].delta.content == "filtered text" + assert getattr(returned_chunk, "custom_field", None) == {"key": "value"} + + def test_set_response_id_propagation_empty_to_valid( initialized_custom_stream_wrapper: CustomStreamWrapper, ):