diff --git a/tests/test_litellm/llms/bedrock/chat/test_streaming_choice_index.py b/tests/test_litellm/llms/bedrock/chat/test_streaming_choice_index.py index 7a28429fda0..b1c41764fe0 100644 --- a/tests/test_litellm/llms/bedrock/chat/test_streaming_choice_index.py +++ b/tests/test_litellm/llms/bedrock/chat/test_streaming_choice_index.py @@ -6,6 +6,11 @@ text=0, toolUse=1), NOT parallel completions. Since Bedrock doesn't support n > 1, all chunks must use choice index 0. +This includes extended-thinking responses where the reasoning block arrives on +contentBlockIndex=0 and the text block on contentBlockIndex=1 — both must be +normalised to choices[0].index == 0 for OpenAI-SDK compatibility. +(Regression for https://github.com/BerriAI/litellm/issues/23178) + References: - Bedrock InferenceConfiguration (no n parameter): https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_InferenceConfiguration.html @@ -13,7 +18,10 @@ https://platform.openai.com/docs/api-reference/chat/object """ -from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder +from litellm.llms.bedrock.chat.invoke_handler import ( + AWSEventStreamDecoder, + AmazonAnthropicClaudeStreamDecoder, +) class TestBedrockStreamingChoiceIndex: @@ -112,3 +120,129 @@ def test_mixed_content_blocks_all_use_choice_index_zero(self): } result4 = handler.converse_chunk_parser(finish_chunk) assert result4.choices[0].index == 0, "Finish reason should have index=0" + + # ------------------------------------------------------------------ + # Extended-thinking / reasoning tests (issue #23178) + # ------------------------------------------------------------------ + + def test_thinking_block_chunks_use_choice_index_zero(self): + """ + Regression for https://github.com/BerriAI/litellm/issues/23178. + + When Claude extended-thinking is enabled the Bedrock converse API emits: + contentBlockIndex=0 -> reasoning / thinking block + contentBlockIndex=1 -> text block + + LiteLLM must normalise both to choices[0].index == 0 so that + OpenAI-compatible clients (e.g. openai.ChatCompletionAccumulator) do not + crash when they see an unexpected index switch mid-stream. + """ + handler = AWSEventStreamDecoder( + model="anthropic.claude-sonnet-4-5-20250929-v1:0" + ) + + # thinking block start (contentBlockIndex=0) + r = handler.converse_chunk_parser( + {"start": {"reasoningContent": {}}, "contentBlockIndex": 0} + ) + assert r.choices[0].index == 0, "thinking start should have index=0" + + # thinking block delta (contentBlockIndex=0) + r = handler.converse_chunk_parser( + { + "delta": {"reasoningContent": {"text": "Let me think step by step..."}}, + "contentBlockIndex": 0, + } + ) + assert r.choices[0].index == 0, "thinking delta should have index=0" + assert r.choices[0].delta.reasoning_content == "Let me think step by step..." + + # thinking block stop (contentBlockIndex=0) + r = handler.converse_chunk_parser({"contentBlockIndex": 0}) + assert r.choices[0].index == 0, "thinking stop should have index=0" + + # text block start (contentBlockIndex=1) — this is the previously broken case + r = handler.converse_chunk_parser( + {"start": {}, "contentBlockIndex": 1} + ) + assert r.choices[0].index == 0, ( + "text start after thinking block should have index=0, not contentBlockIndex=1" + ) + + # text block delta (contentBlockIndex=1) + r = handler.converse_chunk_parser( + {"delta": {"text": "The answer is 42."}, "contentBlockIndex": 1} + ) + assert r.choices[0].index == 0, ( + "text delta after thinking block should have index=0, not contentBlockIndex=1" + ) + assert r.choices[0].delta.content == "The answer is 42." + + # text block stop (contentBlockIndex=1) + r = handler.converse_chunk_parser({"contentBlockIndex": 1}) + assert r.choices[0].index == 0, "text stop should have index=0" + + # message stop + r = handler.converse_chunk_parser({"stopReason": "end_turn"}) + assert r.choices[0].index == 0, "message stop should have index=0" + + def test_thinking_signature_chunk_uses_choice_index_zero(self): + """The reasoning block ends with a cryptographic signature chunk; it must also carry index=0.""" + handler = AWSEventStreamDecoder( + model="anthropic.claude-sonnet-4-5-20250929-v1:0" + ) + r = handler.converse_chunk_parser( + { + "delta": {"reasoningContent": {"signature": "abc123signatureXYZ"}}, + "contentBlockIndex": 0, + } + ) + assert r.choices[0].index == 0, "signature delta should have index=0" + assert r.choices[0].delta.thinking_blocks is not None + assert len(r.choices[0].delta.thinking_blocks) > 0 + + def test_anthropic_invoke_decoder_thinking_uses_choice_index_zero(self): + """ + When Claude models are invoked via the Bedrock /invoke endpoint + (bedrock_invoke_provider='anthropic'), AmazonAnthropicClaudeStreamDecoder + is used. It must normalise the raw Anthropic index=1 on text blocks to + choices[0].index == 0, matching the behaviour of the direct Anthropic provider. + """ + handler = AmazonAnthropicClaudeStreamDecoder( + model="anthropic.claude-sonnet-4-5-20250929-v1:0", + sync_stream=True, + ) + + anthropic_chunks = [ + # thinking block — raw Anthropic index=0 + { + "type": "content_block_start", + "index": 0, + "content_block": {"type": "thinking", "thinking": ""}, + }, + { + "type": "content_block_delta", + "index": 0, + "delta": {"type": "thinking_delta", "thinking": "I should multiply."}, + }, + {"type": "content_block_stop", "index": 0}, + # text block — raw Anthropic index=1 (the problematic case) + { + "type": "content_block_start", + "index": 1, + "content_block": {"type": "text", "text": ""}, + }, + { + "type": "content_block_delta", + "index": 1, + "delta": {"type": "text_delta", "text": "27 x 453 = 12231"}, + }, + {"type": "content_block_stop", "index": 1}, + ] + + for chunk in anthropic_chunks: + result = handler._chunk_parser(chunk) + assert result.choices[0].index == 0, ( + f"chunk type={chunk.get('type')} index={chunk.get('index')} " + f"produced choices[0].index={result.choices[0].index}, expected 0" + )