diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py
index 88140f2fc6..a9ca250b19 100644
--- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py
+++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py
@@ -81,32 +81,32 @@
         "method": "stream",
         "span_name": "anthropic.chat",
     },
-    # # Beta API methods (regular Anthropic SDK)
-    # {
-    #     "package": "anthropic.resources.beta.messages.messages",
-    #     "object": "Messages",
-    #     "method": "create",
-    #     "span_name": "anthropic.chat",
-    # },
-    # {
-    #     "package": "anthropic.resources.beta.messages.messages",
-    #     "object": "Messages",
-    #     "method": "stream",
-    #     "span_name": "anthropic.chat",
-    # },
-    # # Beta API methods (Bedrock SDK)
-    # {
-    #     "package": "anthropic.lib.bedrock._beta_messages",
-    #     "object": "Messages",
-    #     "method": "create",
-    #     "span_name": "anthropic.chat",
-    # },
-    # {
-    #     "package": "anthropic.lib.bedrock._beta_messages",
-    #     "object": "Messages",
-    #     "method": "stream",
-    #     "span_name": "anthropic.chat",
-    # },
+    # Beta API methods (regular Anthropic SDK)
+    {
+        "package": "anthropic.resources.beta.messages.messages",
+        "object": "Messages",
+        "method": "create",
+        "span_name": "anthropic.chat",
+    },
+    {
+        "package": "anthropic.resources.beta.messages.messages",
+        "object": "Messages",
+        "method": "stream",
+        "span_name": "anthropic.chat",
+    },
+    # Beta API methods (Bedrock SDK)
+    {
+        "package": "anthropic.lib.bedrock._beta_messages",
+        "object": "Messages",
+        "method": "create",
+        "span_name": "anthropic.chat",
+    },
+    {
+        "package": "anthropic.lib.bedrock._beta_messages",
+        "object": "Messages",
+        "method": "stream",
+        "span_name": "anthropic.chat",
+    },
 ]
 
 WRAPPED_AMETHODS = [
@@ -122,32 +122,32 @@
         "method": "create",
         "span_name": "anthropic.chat",
     },
-    # # Beta API async methods (regular Anthropic SDK)
-    # {
-    #     "package": "anthropic.resources.beta.messages.messages",
-    #     "object": "AsyncMessages",
-    #     "method": "create",
-    #     "span_name": "anthropic.chat",
-    # },
-    # {
-    #     "package": "anthropic.resources.beta.messages.messages",
-    #     "object": "AsyncMessages",
-    #     "method": "stream",
-    #     "span_name": "anthropic.chat",
-    # },
-    # # Beta API async methods (Bedrock SDK)
-    # {
-    #     "package": "anthropic.lib.bedrock._beta_messages",
-    #     "object": "AsyncMessages",
-    #     "method": "create",
-    #     "span_name": "anthropic.chat",
-    # },
-    # {
-    #     "package": "anthropic.lib.bedrock._beta_messages",
-    #     "object": "AsyncMessages",
-    #     "method": "stream",
-    #     "span_name": "anthropic.chat",
-    # },
+    # Beta API async methods (regular Anthropic SDK)
+    {
+        "package": "anthropic.resources.beta.messages.messages",
+        "object": "AsyncMessages",
+        "method": "create",
+        "span_name": "anthropic.chat",
+    },
+    {
+        "package": "anthropic.resources.beta.messages.messages",
+        "object": "AsyncMessages",
+        "method": "stream",
+        "span_name": "anthropic.chat",
+    },
+    # Beta API async methods (Bedrock SDK)
+    {
+        "package": "anthropic.lib.bedrock._beta_messages",
+        "object": "AsyncMessages",
+        "method": "create",
+        "span_name": "anthropic.chat",
+    },
+    {
+        "package": "anthropic.lib.bedrock._beta_messages",
+        "object": "AsyncMessages",
+        "method": "stream",
+        "span_name": "anthropic.chat",
+    },
 ]
 
 
diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py
index 8900b8d4b4..bb94a18b51 100644
--- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py
+++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py
@@ -8,7 +8,6 @@
     dont_throw,
     model_as_dict,
     should_send_prompts,
-    _extract_response_data,
 )
 from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
     GEN_AI_RESPONSE_ID,
@@ -170,38 +169,65 @@ async def _aset_span_completions(span, response):
     if not should_send_prompts():
         return
     from opentelemetry.instrumentation.anthropic import set_span_attribute
-    from opentelemetry.instrumentation.anthropic.utils import _aextract_response_data
+    import inspect
 
-    response = await _aextract_response_data(response)
+    # If we get a coroutine, await it
+    if inspect.iscoroutine(response):
+        try:
+            response = await response
+        except Exception as e:
+            import logging
+
+            logger = logging.getLogger(__name__)
+            logger.debug(f"Failed to await coroutine response: {e}")
+            return
+
+    # Work directly with the response object to preserve its structure
     index = 0
     prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
-    set_span_attribute(span, f"{prefix}.finish_reason", response.get("stop_reason"))
-    if response.get("role"):
-        set_span_attribute(span, f"{prefix}.role", response.get("role"))
 
-    if response.get("completion"):
-        set_span_attribute(span, f"{prefix}.content", response.get("completion"))
-    elif response.get("content"):
+    # Safely get attributes without extracting the whole object
+    stop_reason = getattr(response, "stop_reason", None)
+    role = getattr(response, "role", None)
+    completion = getattr(response, "completion", None)
+    content = getattr(response, "content", None)
+
+    set_span_attribute(span, f"{prefix}.finish_reason", stop_reason)
+    if role:
+        set_span_attribute(span, f"{prefix}.role", role)
+
+    if completion:
+        set_span_attribute(span, f"{prefix}.content", completion)
+    elif content:
         tool_call_index = 0
         text = ""
-        for content in response.get("content"):
-            content_block_type = content.type
+        for content_item in content:
+            content_block_type = getattr(content_item, "type", None)
             # usually, Antrhopic responds with just one text block,
             # but the API allows for multiple text blocks, so concatenate them
-            if content_block_type == "text" and hasattr(content, "text"):
-                text += content.text
+            if content_block_type == "text" and hasattr(content_item, "text"):
+                text += content_item.text
             elif content_block_type == "thinking":
-                content = dict(content)
+                content_dict = (
+                    dict(content_item)
+                    if hasattr(content_item, "__dict__")
+                    else content_item
+                )
                 # override the role to thinking
                 set_span_attribute(
                     span,
                     f"{prefix}.role",
                     "thinking",
                 )
+                thinking_content = (
+                    content_dict.get("thinking")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "thinking", None)
+                )
                 set_span_attribute(
                     span,
                     f"{prefix}.content",
-                    content.get("thinking"),
+                    thinking_content,
                 )
                 # increment the index for subsequent content blocks
                 index += 1
@@ -210,26 +236,45 @@ async def _aset_span_completions(span, response):
                 set_span_attribute(
                     span,
                     f"{prefix}.role",
-                    response.get("role"),
+                    role,
                 )
             elif content_block_type == "tool_use":
-                content = dict(content)
+                content_dict = (
+                    dict(content_item)
+                    if hasattr(content_item, "__dict__")
+                    else content_item
+                )
+                tool_id = (
+                    content_dict.get("id")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "id", None)
+                )
+                tool_name = (
+                    content_dict.get("name")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "name", None)
+                )
+                tool_input = (
+                    content_dict.get("input")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "input", None)
+                )
+
                 set_span_attribute(
                     span,
                     f"{prefix}.tool_calls.{tool_call_index}.id",
-                    content.get("id"),
+                    tool_id,
                 )
                 set_span_attribute(
                     span,
                     f"{prefix}.tool_calls.{tool_call_index}.name",
-                    content.get("name"),
+                    tool_name,
                 )
-                tool_arguments = content.get("input")
-                if tool_arguments is not None:
+                if tool_input is not None:
                     set_span_attribute(
                         span,
                         f"{prefix}.tool_calls.{tool_call_index}.arguments",
-                        json.dumps(tool_arguments),
+                        json.dumps(tool_input),
                     )
                 tool_call_index += 1
         set_span_attribute(span, f"{prefix}.content", text)
@@ -239,37 +284,64 @@ def _set_span_completions(span, response):
     if not should_send_prompts():
         return
     from opentelemetry.instrumentation.anthropic import set_span_attribute
+    import inspect
 
-    response = _extract_response_data(response)
+    # If we get a coroutine, we cannot process it in sync context
+    if inspect.iscoroutine(response):
+        import logging
+
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            f"_set_span_completions received coroutine {response} - span processing skipped"
+        )
+        return
+
+    # Work directly with the response object to preserve its structure
     index = 0
     prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
-    set_span_attribute(span, f"{prefix}.finish_reason", response.get("stop_reason"))
-    if response.get("role"):
-        set_span_attribute(span, f"{prefix}.role", response.get("role"))
 
-    if response.get("completion"):
-        set_span_attribute(span, f"{prefix}.content", response.get("completion"))
-    elif response.get("content"):
+    # Safely get attributes without extracting the whole object
+    stop_reason = getattr(response, "stop_reason", None)
+    role = getattr(response, "role", None)
+    completion = getattr(response, "completion", None)
+    content = getattr(response, "content", None)
+
+    set_span_attribute(span, f"{prefix}.finish_reason", stop_reason)
+    if role:
+        set_span_attribute(span, f"{prefix}.role", role)
+
+    if completion:
+        set_span_attribute(span, f"{prefix}.content", completion)
+    elif content:
         tool_call_index = 0
         text = ""
-        for content in response.get("content"):
-            content_block_type = content.type
+        for content_item in content:
+            content_block_type = getattr(content_item, "type", None)
             # usually, Antrhopic responds with just one text block,
             # but the API allows for multiple text blocks, so concatenate them
-            if content_block_type == "text" and hasattr(content, "text"):
-                text += content.text
+            if content_block_type == "text" and hasattr(content_item, "text"):
+                text += content_item.text
             elif content_block_type == "thinking":
-                content = dict(content)
+                content_dict = (
+                    dict(content_item)
+                    if hasattr(content_item, "__dict__")
+                    else content_item
+                )
                 # override the role to thinking
                 set_span_attribute(
                     span,
                     f"{prefix}.role",
                     "thinking",
                 )
+                thinking_content = (
+                    content_dict.get("thinking")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "thinking", None)
+                )
                 set_span_attribute(
                     span,
                     f"{prefix}.content",
-                    content.get("thinking"),
+                    thinking_content,
                 )
                 # increment the index for subsequent content blocks
                 index += 1
@@ -278,26 +350,45 @@ def _set_span_completions(span, response):
                 set_span_attribute(
                     span,
                     f"{prefix}.role",
-                    response.get("role"),
+                    role,
                 )
             elif content_block_type == "tool_use":
-                content = dict(content)
+                content_dict = (
+                    dict(content_item)
+                    if hasattr(content_item, "__dict__")
+                    else content_item
+                )
+                tool_id = (
+                    content_dict.get("id")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "id", None)
+                )
+                tool_name = (
+                    content_dict.get("name")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "name", None)
+                )
+                tool_input = (
+                    content_dict.get("input")
+                    if isinstance(content_dict, dict)
+                    else getattr(content_item, "input", None)
+                )
+
                 set_span_attribute(
                     span,
                     f"{prefix}.tool_calls.{tool_call_index}.id",
-                    content.get("id"),
+                    tool_id,
                 )
                 set_span_attribute(
                     span,
                     f"{prefix}.tool_calls.{tool_call_index}.name",
-                    content.get("name"),
+                    tool_name,
                 )
-                tool_arguments = content.get("input")
-                if tool_arguments is not None:
+                if tool_input is not None:
                     set_span_attribute(
                         span,
                         f"{prefix}.tool_calls.{tool_call_index}.arguments",
-                        json.dumps(tool_arguments),
+                        json.dumps(tool_input),
                     )
                 tool_call_index += 1
         set_span_attribute(span, f"{prefix}.content", text)
@@ -306,24 +397,44 @@ def _set_span_completions(span, response):
 @dont_throw
 async def aset_response_attributes(span, response):
     from opentelemetry.instrumentation.anthropic import set_span_attribute
-    from opentelemetry.instrumentation.anthropic.utils import _aextract_response_data
-
-    response = await _aextract_response_data(response)
-    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model"))
-    set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id"))
-
-    if response.get("usage"):
-        prompt_tokens = response.get("usage").input_tokens
-        completion_tokens = response.get("usage").output_tokens
-        set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens)
-        set_span_attribute(
-            span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens
-        )
-        set_span_attribute(
-            span,
-            SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
-            prompt_tokens + completion_tokens,
-        )
+    import inspect
+
+    # If we get a coroutine, await it
+    if inspect.iscoroutine(response):
+        try:
+            response = await response
+        except Exception as e:
+            import logging
+
+            logger = logging.getLogger(__name__)
+            logger.debug(f"Failed to await coroutine response: {e}")
+            return
+
+    # Work directly with the response object
+    model = getattr(response, "model", None)
+    response_id = getattr(response, "id", None)
+    usage = getattr(response, "usage", None)
+
+    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, model)
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, response_id)
+
+    if usage:
+        prompt_tokens = getattr(usage, "input_tokens", None)
+        completion_tokens = getattr(usage, "output_tokens", None)
+        if prompt_tokens is not None:
+            set_span_attribute(
+                span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens
+            )
+        if completion_tokens is not None:
+            set_span_attribute(
+                span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens
+            )
+        if prompt_tokens is not None and completion_tokens is not None:
+            set_span_attribute(
+                span,
+                SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                prompt_tokens + completion_tokens,
+            )
 
     await _aset_span_completions(span, response)
 
@@ -331,23 +442,43 @@ async def aset_response_attributes(span, response):
 @dont_throw
 def set_response_attributes(span, response):
     from opentelemetry.instrumentation.anthropic import set_span_attribute
+    import inspect
 
-    response = _extract_response_data(response)
-    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model"))
-    set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id"))
+    # If we get a coroutine, we cannot process it in sync context
+    if inspect.iscoroutine(response):
+        import logging
 
-    if response.get("usage"):
-        prompt_tokens = response.get("usage").input_tokens
-        completion_tokens = response.get("usage").output_tokens
-        set_span_attribute(span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens)
-        set_span_attribute(
-            span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens
-        )
-        set_span_attribute(
-            span,
-            SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
-            prompt_tokens + completion_tokens,
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            f"set_response_attributes received coroutine {response} - response processing skipped"
         )
+        return
+
+    # Work directly with the response object
+    model = getattr(response, "model", None)
+    response_id = getattr(response, "id", None)
+    usage = getattr(response, "usage", None)
+
+    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, model)
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, response_id)
+
+    if usage:
+        prompt_tokens = getattr(usage, "input_tokens", None)
+        completion_tokens = getattr(usage, "output_tokens", None)
+        if prompt_tokens is not None:
+            set_span_attribute(
+                span, SpanAttributes.LLM_USAGE_PROMPT_TOKENS, prompt_tokens
+            )
+        if completion_tokens is not None:
+            set_span_attribute(
+                span, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, completion_tokens
+            )
+        if prompt_tokens is not None and completion_tokens is not None:
+            set_span_attribute(
+                span,
+                SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                prompt_tokens + completion_tokens,
+            )
 
     _set_span_completions(span, response)
 
diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py
index e25a21ca91..6a90d36c91 100644
--- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py
+++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py
@@ -61,6 +61,91 @@ def _handle_exception(e, func, logger):
     return async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper
 
 
+def _safe_extract_attributes_for_metrics(response):
+    """
+    Safely extract attributes from response object for metrics and token usage only.
+
+    This function avoids directly accessing __dict__ which can have side effects
+    on certain object types like Pydantic models or proxy objects used by LangGraph.
+    """
+    if response is None:
+        return {}
+
+    # If it's already a dict, return a copy to be safe
+    if isinstance(response, dict):
+        return dict(response)
+
+    response_dict = {}
+
+    try:
+        # Try Pydantic model extraction first (most common case for Anthropic responses)
+        from pydantic import BaseModel
+
+        if isinstance(response, BaseModel):
+            # Use model_dump for Pydantic v2 or dict() for v1
+            if hasattr(response, "model_dump"):
+                return response.model_dump()
+            elif hasattr(response, "dict"):
+                return response.dict()
+    except ImportError:
+        pass
+
+    # For metrics, we only need specific attributes
+    metrics_attrs = ["usage", "model", "stop_reason", "id"]
+
+    for attr in metrics_attrs:
+        try:
+            if hasattr(response, attr):
+                value = getattr(response, attr, None)
+                if value is not None:
+                    # For usage objects, extract the token counts safely
+                    if attr == "usage":
+                        if hasattr(value, "input_tokens"):
+                            # For usage objects that have the expected attributes
+                            usage_dict = {}
+                            for token_attr in [
+                                "input_tokens",
+                                "output_tokens",
+                                "cache_read_input_tokens",
+                                "cache_creation_input_tokens",
+                            ]:
+                                if hasattr(value, token_attr):
+                                    token_value = getattr(value, token_attr, None)
+                                    if token_value is not None:
+                                        usage_dict[token_attr] = token_value
+                            response_dict[attr] = usage_dict
+                        else:
+                            # For other usage objects, try different approaches
+                            try:
+                                # Try Pydantic model methods
+                                if hasattr(value, "model_dump"):
+                                    response_dict[attr] = value.model_dump()
+                                elif hasattr(value, "dict"):
+                                    response_dict[attr] = value.dict()
+                                elif hasattr(value, "__dict__"):
+                                    response_dict[attr] = dict(value.__dict__)
+                                else:
+                                    response_dict[attr] = value
+                            except Exception:
+                                response_dict[attr] = value
+                    else:
+                        response_dict[attr] = value
+        except Exception:
+            # Skip attributes that cause issues when accessed
+            continue
+
+    # Handle case where response might be a Pydantic object and fallback methods didn't work
+    if not response_dict and hasattr(response, "__dict__"):
+        try:
+            # Create a safe copy of the dict to avoid modifying the original
+            original_dict = response.__dict__
+            response_dict = dict(original_dict)
+        except Exception:
+            pass
+
+    return response_dict
+
+
 async def _aextract_response_data(response):
     """Async version of _extract_response_data that can await coroutines."""
     import inspect
@@ -71,6 +156,7 @@ async def _aextract_response_data(response):
             response = await response
         except Exception as e:
             import logging
+
             logger = logging.getLogger(__name__)
             logger.debug(f"Failed to await coroutine response: {e}")
             return {}
@@ -79,7 +165,7 @@ async def _aextract_response_data(response):
         return response
 
     # Handle with_raw_response wrapped responses
-    if hasattr(response, 'parse') and callable(response.parse):
+    if hasattr(response, "parse") and callable(response.parse):
         try:
             # For with_raw_response, parse() gives us the actual response object
             parsed_response = response.parse()
@@ -88,13 +174,19 @@ async def _aextract_response_data(response):
             return parsed_response
         except Exception as e:
             import logging
+
             logger = logging.getLogger(__name__)
-            logger.debug(f"Failed to parse response: {e}, response type: {type(response)}")
+            logger.debug(
+                f"Failed to parse response: {e}, response type: {type(response)}"
+            )
 
-    # Fallback to __dict__ for regular response objects
-    if hasattr(response, '__dict__'):
-        response_dict = response.__dict__
-        return response_dict
+    # Safe fallback to __dict__ for regular response objects, but create a copy
+    if hasattr(response, "__dict__"):
+        try:
+            response_dict = dict(response.__dict__)
+            return response_dict
+        except Exception:
+            pass
 
     return {}
 
@@ -106,15 +198,18 @@ def _extract_response_data(response):
     # If we get a coroutine, we cannot process it in sync context
     if inspect.iscoroutine(response):
         import logging
+
         logger = logging.getLogger(__name__)
-        logger.warning(f"_extract_response_data received coroutine {response} - response processing skipped")
+        logger.warning(
+            f"_extract_response_data received coroutine {response} - response processing skipped"
+        )
         return {}
 
     if isinstance(response, dict):
         return response
 
     # Handle with_raw_response wrapped responses
-    if hasattr(response, 'parse') and callable(response.parse):
+    if hasattr(response, "parse") and callable(response.parse):
         try:
             # For with_raw_response, parse() gives us the actual response object
             parsed_response = response.parse()
@@ -123,13 +218,19 @@ def _extract_response_data(response):
             return parsed_response
         except Exception as e:
             import logging
+
             logger = logging.getLogger(__name__)
-            logger.debug(f"Failed to parse response: {e}, response type: {type(response)}")
+            logger.debug(
+                f"Failed to parse response: {e}, response type: {type(response)}"
+            )
 
-    # Fallback to __dict__ for regular response objects
-    if hasattr(response, '__dict__'):
-        response_dict = response.__dict__
-        return response_dict
+    # Safe fallback to __dict__ for regular response objects, but create a copy
+    if hasattr(response, "__dict__"):
+        try:
+            response_dict = dict(response.__dict__)
+            return response_dict
+        except Exception:
+            pass
 
     return {}
 
diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py
index 183743474e..47e32dedea 100644
--- a/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py
+++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_bedrock_with_raw_response.py
@@ -25,7 +25,6 @@ def async_anthropic_bedrock_client(instrument_legacy):
     )
 
 
-@pytest.mark.skip
 @pytest.mark.asyncio
 @pytest.mark.vcr
 async def test_async_anthropic_bedrock_with_raw_response(
@@ -80,7 +79,6 @@ async def test_async_anthropic_bedrock_with_raw_response(
     )
 
 
-@pytest.mark.skip
 @pytest.mark.asyncio
 @pytest.mark.vcr
 async def test_async_anthropic_bedrock_regular_create(
@@ -129,7 +127,6 @@ async def test_async_anthropic_bedrock_regular_create(
     )
 
 
-@pytest.mark.skip
 @pytest.mark.asyncio
 @pytest.mark.vcr
 async def test_async_anthropic_bedrock_beta_with_raw_response(
diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_beta_api_interference.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_beta_api_interference.py
new file mode 100644
index 0000000000..aca0721807
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_beta_api_interference.py
@@ -0,0 +1,154 @@
+import pytest
+import asyncio
+from unittest.mock import Mock, AsyncMock, patch
+
+try:
+    from anthropic import AsyncAnthropicBedrock
+except ImportError:
+    AsyncAnthropicBedrock = None
+
+
+@pytest.fixture
+def async_anthropic_bedrock_client_beta(instrument_legacy):
+    """Create a mock AsyncAnthropicBedrock client with beta API support"""
+    if AsyncAnthropicBedrock is None:
+        pytest.skip("AsyncAnthropicBedrock not available")
+
+    client = AsyncAnthropicBedrock(
+        aws_region="us-east-1",
+        aws_access_key="test-key",
+        aws_secret_key="test-secret",
+    )
+    return client
+
+
+@pytest.mark.asyncio
+async def test_beta_api_context_interference(
+    instrument_legacy,
+    async_anthropic_bedrock_client_beta,
+    span_exporter,
+):
+    """
+    Test that reproduces the LangGraph agent getting stuck due to beta API instrumentation.
+    This test simulates what happens when LangGraph uses Claude on Bedrock with computer use.
+    """
+
+    # Mock a response that simulates what the beta API would return
+    mock_response = Mock()
+    mock_response.content = [Mock(text="I'll help you with that task")]
+    mock_response.usage = Mock(input_tokens=50, output_tokens=20)
+    mock_response.stop_reason = "end_turn"
+
+    # Create a mock beta messages client that behaves like the real one
+    mock_beta_messages = Mock()
+    mock_beta_messages.create = AsyncMock(return_value=mock_response)
+
+    # Patch the beta messages client
+    with patch.object(async_anthropic_bedrock_client_beta, "beta", Mock()) as mock_beta:
+        mock_beta.messages = mock_beta_messages
+
+        # Simulate what LangGraph would do - multiple rapid calls in sequence
+        # This is where the agent gets stuck according to the trace
+        tasks = []
+        for i in range(3):
+            task = asyncio.create_task(
+                mock_beta.messages.create(
+                    max_tokens=1024,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": f"Task {i}: Use the computer to complete this action",
+                        }
+                    ],
+                    model="anthropic.claude-3-sonnet-20241022-v2:0",
+                    tools=[
+                        {
+                            "type": "computer_20241022",
+                            "name": "computer",
+                            "display_width_px": 1024,
+                            "display_height_px": 768,
+                        }
+                    ],
+                )
+            )
+            tasks.append(task)
+            # Small delay to simulate real workflow timing
+            await asyncio.sleep(0.1)
+
+        # Wait for all tasks to complete - this should not hang
+        try:
+            results = await asyncio.wait_for(asyncio.gather(*tasks), timeout=5.0)
+            assert len(results) == 3
+        except asyncio.TimeoutError:
+            pytest.fail(
+                "Tasks timed out - this indicates the beta API instrumentation is causing a hang"
+            )
+
+
+@pytest.mark.asyncio
+async def test_beta_api_response_corruption(
+    instrument_legacy,
+    async_anthropic_bedrock_client_beta,
+    span_exporter,
+):
+    """
+    Test that checks if beta API instrumentation corrupts the response context
+    """
+
+    # Create a mock response with computer use tool result
+    mock_content_item = Mock()
+    mock_content_item.type = "tool_use"
+    mock_content_item.id = "tool_123"
+    mock_content_item.name = "computer"
+    mock_content_item.input = {"action": "click", "coordinate": [100, 200]}
+
+    mock_usage = Mock()
+    mock_usage.input_tokens = 100
+    mock_usage.output_tokens = 50
+
+    mock_response = Mock()
+    mock_response.content = [mock_content_item]
+    mock_response.usage = mock_usage
+    mock_response.stop_reason = "tool_use"
+
+    original_response = mock_response
+
+    # Patch beta API
+    with patch.object(async_anthropic_bedrock_client_beta, "beta", Mock()) as mock_beta:
+        mock_beta.messages = Mock()
+        mock_beta.messages.create = AsyncMock(return_value=mock_response)
+
+        # Make the call
+        response = await mock_beta.messages.create(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "Take a screenshot"}],
+            model="anthropic.claude-3-sonnet-20241022-v2:0",
+            tools=[
+                {
+                    "type": "computer_20241022",
+                    "name": "computer",
+                    "display_width_px": 1024,
+                    "display_height_px": 768,
+                }
+            ],
+        )
+
+        # Verify the response wasn't corrupted by instrumentation
+        assert response is not None
+        assert response.content[0].type == "tool_use"
+        assert response.content[0].name == "computer"
+        assert response.stop_reason == "tool_use"
+
+        # Check that the response object identity is preserved
+        # If instrumentation wraps the response incorrectly, this could break LangGraph
+        assert response is original_response or hasattr(response, "__wrapped__")
+
+
+def test_beta_api_method_signature_preservation():
+    """
+    Test that beta API method signatures are preserved after instrumentation.
+    LangGraph might depend on specific method signatures or attributes.
+    """
+    # This test would check that the wrapped methods preserve their original signatures
+    # and don't introduce unexpected parameters or return types
+    pass