diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py index 88140f2fc6..a9ca250b19 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py @@ -81,32 +81,32 @@ "method": "stream", "span_name": "anthropic.chat", }, - # # Beta API methods (regular Anthropic SDK) - # { - # "package": "anthropic.resources.beta.messages.messages", - # "object": "Messages", - # "method": "create", - # "span_name": "anthropic.chat", - # }, - # { - # "package": "anthropic.resources.beta.messages.messages", - # "object": "Messages", - # "method": "stream", - # "span_name": "anthropic.chat", - # }, - # # Beta API methods (Bedrock SDK) - # { - # "package": "anthropic.lib.bedrock._beta_messages", - # "object": "Messages", - # "method": "create", - # "span_name": "anthropic.chat", - # }, - # { - # "package": "anthropic.lib.bedrock._beta_messages", - # "object": "Messages", - # "method": "stream", - # "span_name": "anthropic.chat", - # }, + # Beta API methods (regular Anthropic SDK) + { + "package": "anthropic.resources.beta.messages.messages", + "object": "Messages", + "method": "create", + "span_name": "anthropic.chat", + }, + { + "package": "anthropic.resources.beta.messages.messages", + "object": "Messages", + "method": "stream", + "span_name": "anthropic.chat", + }, + # Beta API methods (Bedrock SDK) + { + "package": "anthropic.lib.bedrock._beta_messages", + "object": "Messages", + "method": "create", + "span_name": "anthropic.chat", + }, + { + "package": "anthropic.lib.bedrock._beta_messages", + "object": "Messages", + "method": "stream", + "span_name": "anthropic.chat", + }, ] WRAPPED_AMETHODS = [ @@ -122,32 +122,32 @@ "method": "create", "span_name": "anthropic.chat", }, - # # Beta API async methods (regular Anthropic SDK) - # { - # "package": "anthropic.resources.beta.messages.messages", - # "object": "AsyncMessages", - # "method": "create", - # "span_name": "anthropic.chat", - # }, - # { - # "package": "anthropic.resources.beta.messages.messages", - # "object": "AsyncMessages", - # "method": "stream", - # "span_name": "anthropic.chat", - # }, - # # Beta API async methods (Bedrock SDK) - # { - # "package": "anthropic.lib.bedrock._beta_messages", - # "object": "AsyncMessages", - # "method": "create", - # "span_name": "anthropic.chat", - # }, - # { - # "package": "anthropic.lib.bedrock._beta_messages", - # "object": "AsyncMessages", - # "method": "stream", - # "span_name": "anthropic.chat", - # }, + # Beta API async methods (regular Anthropic SDK) + { + "package": "anthropic.resources.beta.messages.messages", + "object": "AsyncMessages", + "method": "create", + "span_name": "anthropic.chat", + }, + { + "package": "anthropic.resources.beta.messages.messages", + "object": "AsyncMessages", + "method": "stream", + "span_name": "anthropic.chat", + }, + # Beta API async methods (Bedrock SDK) + { + "package": "anthropic.lib.bedrock._beta_messages", + "object": "AsyncMessages", + "method": "create", + "span_name": "anthropic.chat", + }, + { + "package": "anthropic.lib.bedrock._beta_messages", + "object": "AsyncMessages", + "method": "stream", + "span_name": "anthropic.chat", + }, ] diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py index 8900b8d4b4..bb94a18b51 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py @@ -8,7 +8,6 @@ dont_throw, model_as_dict, should_send_prompts, - _extract_response_data, ) from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( GEN_AI_RESPONSE_ID, @@ -170,38 +169,65 @@ async def _aset_span_completions(span, response): if not should_send_prompts(): return from opentelemetry.instrumentation.anthropic import set_span_attribute - from opentelemetry.instrumentation.anthropic.utils import _aextract_response_data + import inspect - response = await _aextract_response_data(response) + # If we get a coroutine, await it + if inspect.iscoroutine(response): + try: + response = await response + except Exception as e: + import logging + + logger = logging.getLogger(__name__) + logger.debug(f"Failed to await coroutine response: {e}") + return + + # Work directly with the response object to preserve its structure index = 0 prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}" - set_span_attribute(span, f"{prefix}.finish_reason", response.get("stop_reason")) - if response.get("role"): - set_span_attribute(span, f"{prefix}.role", response.get("role")) - if response.get("completion"): - set_span_attribute(span, f"{prefix}.content", response.get("completion")) - elif response.get("content"): + # Safely get attributes without extracting the whole object + stop_reason = getattr(response, "stop_reason", None) + role = getattr(response, "role", None) + completion = getattr(response, "completion", None) + content = getattr(response, "content", None) + + set_span_attribute(span, f"{prefix}.finish_reason", stop_reason) + if role: + set_span_attribute(span, f"{prefix}.role", role) + + if completion: + set_span_attribute(span, f"{prefix}.content", completion) + elif content: tool_call_index = 0 text = "" - for content in response.get("content"): - content_block_type = content.type + for content_item in content: + content_block_type = getattr(content_item, "type", None) # usually, Antrhopic responds with just one text block, # but the API allows for multiple text blocks, so concatenate them - if content_block_type == "text" and hasattr(content, "text"): - text += content.text + if content_block_type == "text" and hasattr(content_item, "text"): + text += content_item.text elif content_block_type == "thinking": - content = dict(content) + content_dict = ( + dict(content_item) + if hasattr(content_item, "__dict__") + else content_item + ) # override the role to thinking set_span_attribute( span, f"{prefix}.role", "thinking", ) + thinking_content = ( + content_dict.get("thinking") + if isinstance(content_dict, dict) + else getattr(content_item, "thinking", None) + ) set_span_attribute( span, f"{prefix}.content", - content.get("thinking"), + thinking_content, ) # increment the index for subsequent content blocks index += 1 @@ -210,26 +236,45 @@ async def _aset_span_completions(span, response): set_span_attribute( span, f"{prefix}.role", - response.get("role"), + role, ) elif content_block_type == "tool_use": - content = dict(content) + content_dict = ( + dict(content_item) + if hasattr(content_item, "__dict__") + else content_item + ) + tool_id = ( + content_dict.get("id") + if isinstance(content_dict, dict) + else getattr(content_item, "id", None) + ) + tool_name = ( + content_dict.get("name") + if isinstance(content_dict, dict) + else getattr(content_item, "name", None) + ) + tool_input = ( + content_dict.get("input") + if isinstance(content_dict, dict) + else getattr(content_item, "input", None) + ) + set_span_attribute( span, f"{prefix}.tool_calls.{tool_call_index}.id", - content.get("id"), + tool_id, ) set_span_attribute( span, f"{prefix}.tool_calls.{tool_call_index}.name", - content.get("name"), + tool_name, ) - tool_arguments = content.get("input") - if tool_arguments is not None: + if tool_input is not None: set_span_attribute( span, f"{prefix}.tool_calls.{tool_call_index}.arguments", - json.dumps(tool_arguments), + json.dumps(tool_input), ) tool_call_index += 1 set_span_attribute(span, f"{prefix}.content", text) @@ -239,37 +284,64 @@ def _set_span_completions(span, response): if not should_send_prompts(): return from opentelemetry.instrumentation.anthropic import set_span_attribute + import inspect - response = _extract_response_data(response) + # If we get a coroutine, we cannot process it in sync context + if inspect.iscoroutine(response): + import logging + + logger = logging.getLogger(__name__) + logger.warning( + f"_set_span_completions received coroutine {response} - span processing skipped" + ) + return + + # Work directly with the response object to preserve its structure index = 0 prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}" - set_span_attribute(span, f"{prefix}.finish_reason", response.get("stop_reason")) - if response.get("role"): - set_span_attribute(span, f"{prefix}.role", response.get("role")) - if response.get("completion"): - set_span_attribute(span, f"{prefix}.content", response.get("completion")) - elif response.get("content"): + # Safely get attributes without extracting the whole object + stop_reason = getattr(response, "stop_reason", None) + role = getattr(response, "role", None) + completion = getattr(response, "completion", None) + content = getattr(response, "content", None) + + set_span_attribute(span, f"{prefix}.finish_reason", stop_reason) + if role: + set_span_attribute(span, f"{prefix}.role", role) + + if completion: + set_span_attribute(span, f"{prefix}.content", completion) + elif content: tool_call_index = 0 text = "" - for content in response.get("content"): - content_block_type = content.type + for content_item in content: + content_block_type = getattr(content_item, "type", None) # usually, Antrhopic responds with just one text block, # but the API allows for multiple text blocks, so concatenate them - if content_block_type == "text" and hasattr(content, "text"): - text += content.text + if content_block_type == "text" and hasattr(content_item, "text"): + text += content_item.text elif content_block_type == "thinking": - content = dict(content) + content_dict = ( + dict(content_item) + if hasattr(content_item, "__dict__") + else content_item + ) # override the role to thinking set_span_attribute( span, f"{prefix}.role", "thinking", ) + thinking_content = ( + content_dict.get("thinking") + if isinstance(content_dict, dict) + else getattr(content_item, "thinking", None) + ) set_span_attribute( span, f"{prefix}.content", - content.get("thinking"), + thinking_content, ) # increment the index for subsequent content blocks index += 1 @@ -278,26 +350,45 @@ def _set_span_completions(span, response): set_span_attribute( span, f"{prefix}.role", - response.get("role"), + role, ) elif content_block_type == "tool_use": - content = dict(content) + content_dict = ( + dict(content_item) + if hasattr(content_item, "__dict__") + else content_item + ) + tool_id = ( + content_dict.get("id") + if isinstance(content_dict, dict) + else getattr(content_item, "id", None) + ) + tool_name = ( + content_dict.get("name") + if isinstance(content_dict, dict) + else getattr(content_item, "name", None) + ) + tool_input = ( + content_dict.get("input") + if isinstance(content_dict, dict) + else getattr(content_item, "input", None) + ) + set_span_attribute( span, f"{prefix}.tool_calls.{tool_call_index}.id", - content.get("id"), + tool_id, ) set_span_attribute( span, f"{prefix}.tool_calls.{tool_call_index}.name", - content.get("name"), + tool_name, ) - tool_arguments = content.get("input") - if tool_arguments is not None: + if tool_input is not None: set_span_attribute( span, f"{prefix}.tool_calls.{tool_call_index}.arguments", - json.dumps(tool_arguments), + json.dumps(tool_input), ) tool_call_index += 1 set_span_attribute(span, f"{prefix}.content", text) @@ -306,24 +397,44 @@ def _set_span_completions(span, response): @dont_throw async def aset_response_attributes(span, response): from opentelemetry.instrumentation.anthropic import set_span_attribute - from opentelemetry.instrumentation.anthropic.utils import _aextract_response_data - - response = await _aextract_response_data(response) - set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model")) - set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id")) - - if response.get("usage"): - prompt_tokens = response.get("usage").input_tokens - completion_tokens = response.get("usage").output_tokens - set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens) - set_span_attribute( - span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens - ) - set_span_attribute( - span, - SpanAttributes.LLM_USAGE_TOTAL_TOKENS, - prompt_tokens + completion_tokens, - ) + import inspect + + # If we get a coroutine, await it + if inspect.iscoroutine(response): + try: + response = await response + except Exception as e: + import logging + + logger = logging.getLogger(__name__) + logger.debug(f"Failed to await coroutine response: {e}") + return + + # Work directly with the response object + model = getattr(response, "model", None) + response_id = getattr(response, "id", None) + usage = getattr(response, "usage", None) + + set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, model) + set_span_attribute(span, GEN_AI_RESPONSE_ID, response_id) + + if usage: + prompt_tokens = getattr(usage, "input_tokens", None) + completion_tokens = getattr(usage, "output_tokens", None) + if prompt_tokens is not None: + set_span_attribute( + span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens + ) + if completion_tokens is not None: + set_span_attribute( + span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens + ) + if prompt_tokens is not None and completion_tokens is not None: + set_span_attribute( + span, + SpanAttributes.LLM_USAGE_TOTAL_TOKENS, + prompt_tokens + completion_tokens, + ) await _aset_span_completions(span, response) @@ -331,23 +442,43 @@ async def aset_response_attributes(span, response): @dont_throw def set_response_attributes(span, response): from opentelemetry.instrumentation.anthropic import set_span_attribute + import inspect - response = _extract_response_data(response) - set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model")) - set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id")) + # If we get a coroutine, we cannot process it in sync context + if inspect.iscoroutine(response): + import logging - if response.get("usage"): - prompt_tokens = response.get("usage").input_tokens - completion_tokens = response.get("usage").output_tokens - set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens) - set_span_attribute( - span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens - ) - set_span_attribute( - span, - SpanAttributes.LLM_USAGE_TOTAL_TOKENS, - prompt_tokens + completion_tokens, + logger = logging.getLogger(__name__) + logger.warning( + f"set_response_attributes received coroutine {response} - response processing skipped" ) + return + + # Work directly with the response object + model = getattr(response, "model", None) + response_id = getattr(response, "id", None) + usage = getattr(response, "usage", None) + + set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, model) + set_span_attribute(span, GEN_AI_RESPONSE_ID, response_id) + + if usage: + prompt_tokens = getattr(usage, "input_tokens", None) + completion_tokens = getattr(usage, "output_tokens", None) + if prompt_tokens is not None: + set_span_attribute( + span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens + ) + if completion_tokens is not None: + set_span_attribute( + span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens + ) + if prompt_tokens is not None and completion_tokens is not None: + set_span_attribute( + span, + SpanAttributes.LLM_USAGE_TOTAL_TOKENS, + prompt_tokens + completion_tokens, + ) _set_span_completions(span, response) diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py index e25a21ca91..6a90d36c91 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py @@ -61,6 +61,91 @@ def _handle_exception(e, func, logger): return async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper +def _safe_extract_attributes_for_metrics(response): + """ + Safely extract attributes from response object for metrics and token usage only. + + This function avoids directly accessing __dict__ which can have side effects + on certain object types like Pydantic models or proxy objects used by LangGraph. + """ + if response is None: + return {} + + # If it's already a dict, return a copy to be safe + if isinstance(response, dict): + return dict(response) + + response_dict = {} + + try: + # Try Pydantic model extraction first (most common case for Anthropic responses) + from pydantic import BaseModel + + if isinstance(response, BaseModel): + # Use model_dump for Pydantic v2 or dict() for v1 + if hasattr(response, "model_dump"): + return response.model_dump() + elif hasattr(response, "dict"): + return response.dict() + except ImportError: + pass + + # For metrics, we only need specific attributes + metrics_attrs = ["usage", "model", "stop_reason", "id"] + + for attr in metrics_attrs: + try: + if hasattr(response, attr): + value = getattr(response, attr, None) + if value is not None: + # For usage objects, extract the token counts safely + if attr == "usage": + if hasattr(value, "input_tokens"): + # For usage objects that have the expected attributes + usage_dict = {} + for token_attr in [ + "input_tokens", + "output_tokens", + "cache_read_input_tokens", + "cache_creation_input_tokens", + ]: + if hasattr(value, token_attr): + token_value = getattr(value, token_attr, None) + if token_value is not None: + usage_dict[token_attr] = token_value + response_dict[attr] = usage_dict + else: + # For other usage objects, try different approaches + try: + # Try Pydantic model methods + if hasattr(value, "model_dump"): + response_dict[attr] = value.model_dump() + elif hasattr(value, "dict"): + response_dict[attr] = value.dict() + elif hasattr(value, "__dict__"): + response_dict[attr] = dict(value.__dict__) + else: + response_dict[attr] = value + except Exception: + response_dict[attr] = value + else: + response_dict[attr] = value + except Exception: + # Skip attributes that cause issues when accessed + continue + + # Handle case where response might be a Pydantic object and fallback methods didn't work + if not response_dict and hasattr(response, "__dict__"): + try: + # Create a safe copy of the dict to avoid modifying the original + original_dict = response.__dict__ + response_dict = dict(original_dict) + except Exception: + pass + + return response_dict + + async def _aextract_response_data(response): """Async version of _extract_response_data that can await coroutines.""" import inspect @@ -71,6 +156,7 @@ async def _aextract_response_data(response): response = await response except Exception as e: import logging + logger = logging.getLogger(__name__) logger.debug(f"Failed to await coroutine response: {e}") return {} @@ -79,7 +165,7 @@ async def _aextract_response_data(response): return response # Handle with_raw_response wrapped responses - if hasattr(response, 'parse') and callable(response.parse): + if hasattr(response, "parse") and callable(response.parse): try: # For with_raw_response, parse() gives us the actual response object parsed_response = response.parse() @@ -88,13 +174,19 @@ async def _aextract_response_data(response): return parsed_response except Exception as e: import logging + logger = logging.getLogger(__name__) - logger.debug(f"Failed to parse response: {e}, response type: {type(response)}") + logger.debug( + f"Failed to parse response: {e}, response type: {type(response)}" + ) - # Fallback to __dict__ for regular response objects - if hasattr(response, '__dict__'): - response_dict = response.__dict__ - return response_dict + # Safe fallback to __dict__ for regular response objects, but create a copy + if hasattr(response, "__dict__"): + try: + response_dict = dict(response.__dict__) + return response_dict + except Exception: + pass return {} @@ -106,15 +198,18 @@ def _extract_response_data(response): # If we get a coroutine, we cannot process it in sync context if inspect.iscoroutine(response): import logging + logger = logging.getLogger(__name__) - logger.warning(f"_extract_response_data received coroutine {response} - response processing skipped") + logger.warning( + f"_extract_response_data received coroutine {response} - response processing skipped" + ) return {} if isinstance(response, dict): return response # Handle with_raw_response wrapped responses - if hasattr(response, 'parse') and callable(response.parse): + if hasattr(response, "parse") and callable(response.parse): try: # For with_raw_response, parse() gives us the actual response object parsed_response = response.parse() @@ -123,13 +218,19 @@ def _extract_response_data(response): return parsed_response except Exception as e: import logging + logger = logging.getLogger(__name__) - logger.debug(f"Failed to parse response: {e}, response type: {type(response)}") + logger.debug( + f"Failed to parse response: {e}, response type: {type(response)}" + ) - # Fallback to __dict__ for regular response objects - if hasattr(response, '__dict__'): - response_dict = response.__dict__ - return response_dict + # Safe fallback to __dict__ for regular response objects, but create a copy + if hasattr(response, "__dict__"): + try: + response_dict = dict(response.__dict__) + return response_dict + except Exception: + pass return {} diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py index 183743474e..47e32dedea 100644 --- a/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py +++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py @@ -25,7 +25,6 @@ def async_anthropic_bedrock_client(instrument_legacy): ) -@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.vcr async def test_async_anthropic_bedrock_with_raw_response( @@ -80,7 +79,6 @@ async def test_async_anthropic_bedrock_with_raw_response( ) -@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.vcr async def test_async_anthropic_bedrock_regular_create( @@ -129,7 +127,6 @@ async def test_async_anthropic_bedrock_regular_create( ) -@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.vcr async def test_async_anthropic_bedrock_beta_with_raw_response( diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_beta_api_interference.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_beta_api_interference.py new file mode 100644 index 0000000000..aca0721807 --- /dev/null +++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_beta_api_interference.py @@ -0,0 +1,154 @@ +import pytest +import asyncio +from unittest.mock import Mock, AsyncMock, patch + +try: + from anthropic import AsyncAnthropicBedrock +except ImportError: + AsyncAnthropicBedrock = None + + +@pytest.fixture +def async_anthropic_bedrock_client_beta(instrument_legacy): + """Create a mock AsyncAnthropicBedrock client with beta API support""" + if AsyncAnthropicBedrock is None: + pytest.skip("AsyncAnthropicBedrock not available") + + client = AsyncAnthropicBedrock( + aws_region="us-east-1", + aws_access_key="test-key", + aws_secret_key="test-secret", + ) + return client + + +@pytest.mark.asyncio +async def test_beta_api_context_interference( + instrument_legacy, + async_anthropic_bedrock_client_beta, + span_exporter, +): + """ + Test that reproduces the LangGraph agent getting stuck due to beta API instrumentation. + This test simulates what happens when LangGraph uses Claude on Bedrock with computer use. + """ + + # Mock a response that simulates what the beta API would return + mock_response = Mock() + mock_response.content = [Mock(text="I'll help you with that task")] + mock_response.usage = Mock(input_tokens=50, output_tokens=20) + mock_response.stop_reason = "end_turn" + + # Create a mock beta messages client that behaves like the real one + mock_beta_messages = Mock() + mock_beta_messages.create = AsyncMock(return_value=mock_response) + + # Patch the beta messages client + with patch.object(async_anthropic_bedrock_client_beta, "beta", Mock()) as mock_beta: + mock_beta.messages = mock_beta_messages + + # Simulate what LangGraph would do - multiple rapid calls in sequence + # This is where the agent gets stuck according to the trace + tasks = [] + for i in range(3): + task = asyncio.create_task( + mock_beta.messages.create( + max_tokens=1024, + messages=[ + { + "role": "user", + "content": f"Task {i}: Use the computer to complete this action", + } + ], + model="anthropic.claude-3-sonnet-20241022-v2:0", + tools=[ + { + "type": "computer_20241022", + "name": "computer", + "display_width_px": 1024, + "display_height_px": 768, + } + ], + ) + ) + tasks.append(task) + # Small delay to simulate real workflow timing + await asyncio.sleep(0.1) + + # Wait for all tasks to complete - this should not hang + try: + results = await asyncio.wait_for(asyncio.gather(*tasks), timeout=5.0) + assert len(results) == 3 + except asyncio.TimeoutError: + pytest.fail( + "Tasks timed out - this indicates the beta API instrumentation is causing a hang" + ) + + +@pytest.mark.asyncio +async def test_beta_api_response_corruption( + instrument_legacy, + async_anthropic_bedrock_client_beta, + span_exporter, +): + """ + Test that checks if beta API instrumentation corrupts the response context + """ + + # Create a mock response with computer use tool result + mock_content_item = Mock() + mock_content_item.type = "tool_use" + mock_content_item.id = "tool_123" + mock_content_item.name = "computer" + mock_content_item.input = {"action": "click", "coordinate": [100, 200]} + + mock_usage = Mock() + mock_usage.input_tokens = 100 + mock_usage.output_tokens = 50 + + mock_response = Mock() + mock_response.content = [mock_content_item] + mock_response.usage = mock_usage + mock_response.stop_reason = "tool_use" + + original_response = mock_response + + # Patch beta API + with patch.object(async_anthropic_bedrock_client_beta, "beta", Mock()) as mock_beta: + mock_beta.messages = Mock() + mock_beta.messages.create = AsyncMock(return_value=mock_response) + + # Make the call + response = await mock_beta.messages.create( + max_tokens=1024, + messages=[{"role": "user", "content": "Take a screenshot"}], + model="anthropic.claude-3-sonnet-20241022-v2:0", + tools=[ + { + "type": "computer_20241022", + "name": "computer", + "display_width_px": 1024, + "display_height_px": 768, + } + ], + ) + + # Verify the response wasn't corrupted by instrumentation + assert response is not None + assert response.content[0].type == "tool_use" + assert response.content[0].name == "computer" + assert response.stop_reason == "tool_use" + + # Check that the response object identity is preserved + # If instrumentation wraps the response incorrectly, this could break LangGraph + assert response is original_response or hasattr(response, "__wrapped__") + + +def test_beta_api_method_signature_preservation(): + """ + Test that beta API method signatures are preserved after instrumentation. + LangGraph might depend on specific method signatures or attributes. + """ + # This test would check that the wrapped methods preserve their original signatures + # and don't introduce unexpected parameters or return types + pass