From 41f5ec53e0b2ed81d0ede4ad9a75d1648bb368cd Mon Sep 17 00:00:00 2001 From: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> Date: Tue, 4 Nov 2025 12:09:26 -0500 Subject: [PATCH] fix(llmobs): safely output format bedrock cohere rerank spans (#15124) ## Description Closes #14575. Adds safely accessing output response messages in the bedrock integration. This happens when cohere rerank models are invoked, since cohere rerank responses lack a response["text"] field and will return an empty list. ## Testing Added a test invoking cohere rerank models. ## Risks ## Additional Notes (cherry picked from commit 191f10e1e04bf904557a3e91d2b1b11ef5741627) Signed-off-by: Yun Kim --- ddtrace/llmobs/_integrations/bedrock.py | 15 +++--- ...edrock-llmobs-cohere-a1bcb48e9b252d77.yaml | 4 ++ .../cohere_rerank_invoke.yaml | 52 +++++++++++++++++++ tests/contrib/botocore/test_bedrock_llmobs.py | 33 ++++++++++-- 4 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 releasenotes/notes/fix-bedrock-llmobs-cohere-a1bcb48e9b252d77.yaml create mode 100644 tests/contrib/botocore/bedrock_cassettes/cohere_rerank_invoke.yaml diff --git a/ddtrace/llmobs/_integrations/bedrock.py b/ddtrace/llmobs/_integrations/bedrock.py index b6eb244e300..592fb31145b 100644 --- a/ddtrace/llmobs/_integrations/bedrock.py +++ b/ddtrace/llmobs/_integrations/bedrock.py @@ -371,13 +371,14 @@ def _extract_output_message(response) -> List[Message]: """Extract output messages from the stored response. Anthropic allows for chat messages, which requires some special casing. """ - if isinstance(response["text"], str): - return [Message(content=response["text"])] - if isinstance(response["text"], list): - if isinstance(response["text"][0], str): - return [Message(content=str(content)) for content in response["text"]] - if isinstance(response["text"][0], dict): - return [Message(content=response["text"][0].get("text", ""))] + resp_text = response.get("text", "") + if isinstance(resp_text, str): + return [Message(content=resp_text)] + if resp_text and isinstance(resp_text, list): + if isinstance(resp_text[0], str): + return [Message(content=str(content)) for content in resp_text] + if isinstance(resp_text[0], dict): + return [Message(content=resp_text[0].get("text", ""))] return [] def _get_base_url(self, **kwargs: Dict[str, Any]) -> Optional[str]: diff --git a/releasenotes/notes/fix-bedrock-llmobs-cohere-a1bcb48e9b252d77.yaml b/releasenotes/notes/fix-bedrock-llmobs-cohere-a1bcb48e9b252d77.yaml new file mode 100644 index 00000000000..47f7e73cb86 --- /dev/null +++ b/releasenotes/notes/fix-bedrock-llmobs-cohere-a1bcb48e9b252d77.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + LLM Observability: Resolves an issue in the bedrock integration where invoking cohere rerank models would result in missing spans due to output formatting index errors. diff --git a/tests/contrib/botocore/bedrock_cassettes/cohere_rerank_invoke.yaml b/tests/contrib/botocore/bedrock_cassettes/cohere_rerank_invoke.yaml new file mode 100644 index 00000000000..bfbe8cfa37c --- /dev/null +++ b/tests/contrib/botocore/bedrock_cassettes/cohere_rerank_invoke.yaml @@ -0,0 +1,52 @@ +interactions: +- request: + body: '{"query": "What is the capital of the United States?", "documents": ["Carson + City is the capital city of the American state of Nevada.", "The Commonwealth + of the Northern Mariana Islands is a group of islands in the Pacific Ocean. + Its capital is Saipan.", "Washington, D.C. (also known as simply Washington + or D.C., and officially as the District of Columbia) is the capital of the United + States. It is a federal district.", "Capitalization or capitalisation in English + grammar is the use of a capital letter at the start of a word. English usage + varies from capitalization in other languages.", "Capital punishment has existed + in the United States since beforethe United States was a country. As of 2017, + capital punishment is legal in 30 of the 50 states."], "api_version": 2, "top_n": + 3}' + headers: + Content-Length: + - '790' + User-Agent: + - !!binary | + Qm90bzMvMS4zNC40OSBtZC9Cb3RvY29yZSMxLjM0LjQ5IHVhLzIuMCBvcy9tYWNvcyMyNC42LjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjExLjEzIG1kL3B5aW1wbCNDUHl0aG9uIGNmZy9y + ZXRyeS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM0LjQ5 + X-Amz-Date: + - !!binary | + MjAyNTEwMzFUMjEwODQ2Wg== + amz-sdk-invocation-id: + - !!binary | + NDNlZWJhN2EtNGY1Yy00ZDI1LWFmNzUtYjY0NDNjNmUzYzM0 + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/cohere.rerank-v3-5%3A0/invoke + response: + body: + string: '{"results":[{"index":2,"relevance_score":0.8742601},{"index":0,"relevance_score":0.1728413},{"index":4,"relevance_score":0.10793502}]}' + headers: + Connection: + - keep-alive + Content-Length: + - '134' + Content-Type: + - application/json + Date: + - Fri, 31 Oct 2025 21:08:47 GMT + X-Amzn-Bedrock-Invocation-Latency: + - '109' + x-amzn-RequestId: + - de4d85e7-fe35-4809-bda9-7675d0eb091f + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/botocore/test_bedrock_llmobs.py b/tests/contrib/botocore/test_bedrock_llmobs.py index d88770452cb..fd813a81a66 100644 --- a/tests/contrib/botocore/test_bedrock_llmobs.py +++ b/tests/contrib/botocore/test_bedrock_llmobs.py @@ -26,10 +26,15 @@ ) class TestLLMObsBedrock: @staticmethod - def expected_llmobs_span_event(span, n_output, message=False, metadata=None, token_metrics=None): + def expected_llmobs_span_event( + span, n_output, input_message=False, output_message=False, metadata=None, token_metrics=None + ): expected_input = [{"content": mock.ANY}] - if message: + if input_message: expected_input = [{"content": mock.ANY, "role": "user"}] + expected_output = [] + if output_message: + expected_output = [{"content": mock.ANY} for _ in range(n_output)] # Use empty dicts as defaults for _expected_llmobs_llm_span_event to avoid None issues expected_parameters = metadata if metadata is not None else {} @@ -40,7 +45,7 @@ def expected_llmobs_span_event(span, n_output, message=False, metadata=None, tok model_name=span.get_tag("bedrock.request.model"), model_provider=span.get_tag("bedrock.request.model_provider"), input_messages=expected_input, - output_messages=[{"content": mock.ANY} for _ in range(n_output)], + output_messages=expected_output, metadata=expected_parameters, token_metrics=expected_token_metrics, tags={"service": "aws.bedrock-runtime", "ml_app": ""}, @@ -86,7 +91,7 @@ def _test_llmobs_invoke(cls, provider, bedrock_client, mock_tracer, llmobs_event assert len(llmobs_events) == 1 assert llmobs_events[0] == cls.expected_llmobs_span_event( - span, n_output, message="message" in provider, metadata=expected_metadata + span, n_output, input_message="message" in provider, output_message=True, metadata=expected_metadata ) LLMObs.disable() @@ -121,7 +126,7 @@ def _test_llmobs_invoke_stream( assert len(llmobs_events) == 1 assert llmobs_events[0] == cls.expected_llmobs_span_event( - span, n_output, message="message" in provider, metadata=expected_metadata + span, n_output, input_message="message" in provider, output_message=True, metadata=expected_metadata ) def test_llmobs_ai21_invoke(self, ddtrace_global_config, bedrock_client, mock_tracer, llmobs_events): @@ -156,6 +161,24 @@ def test_llmobs_cohere_multi_output_invoke(self, ddtrace_global_config, bedrock_ def test_llmobs_meta_invoke(self, ddtrace_global_config, bedrock_client, mock_tracer, llmobs_events): self._test_llmobs_invoke("meta", bedrock_client, mock_tracer, llmobs_events) + def test_llmobs_cohere_rerank_invoke(self, ddtrace_global_config, bedrock_client, mock_tracer, llmobs_events): + cassette_name = "cohere_rerank_invoke.yaml" + model = "cohere.rerank-v3-5:0" + prompt_data = "What is the capital of the United States?" + documents = [ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands's capital is Saipan.", + ] + body = json.dumps({"query": prompt_data, "documents": documents, "api_version": 2, "top_n": 3}) + with get_request_vcr().use_cassette(cassette_name): + response = bedrock_client.invoke_model(body=body, modelId=model) + json.loads(response.get("body").read()) + span = mock_tracer.pop_traces()[0][0] + + assert len(llmobs_events) == 1 + assert llmobs_events[0] == self.expected_llmobs_span_event(span, 1) + LLMObs.disable() + def test_llmobs_amazon_invoke_stream(self, ddtrace_global_config, bedrock_client, mock_tracer, llmobs_events): self._test_llmobs_invoke_stream("amazon", bedrock_client, mock_tracer, llmobs_events)