diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py
index ba135e7487..4687832763 100644
--- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py
+++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py
@@ -1,6 +1,7 @@
 import copy
 import json
 import logging
+import threading
 import time
 from functools import singledispatch
 from typing import List, Optional, Union
@@ -269,7 +270,8 @@ async def _handle_request(span, kwargs, instance):
                 MessageEvent(
                     content=message.get("content"),
                     role=message.get("role"),
-                    tool_calls=_parse_tool_calls(message.get("tool_calls", None)),
+                    tool_calls=_parse_tool_calls(
+                        message.get("tool_calls", None)),
                 )
             )
     else:
@@ -292,6 +294,7 @@ def _handle_response(
     choice_counter=None,
     duration_histogram=None,
     duration=None,
+    is_streaming: bool = False,
 ):
     if is_openai_v1():
         response_dict = model_as_dict(response)
@@ -306,6 +309,7 @@ def _handle_response(
         duration_histogram,
         response_dict,
         duration,
+        is_streaming,
     )
 
     # span attributes
@@ -323,13 +327,19 @@ def _handle_response(
 
 
 def _set_chat_metrics(
-    instance, token_counter, choice_counter, duration_histogram, response_dict, duration
+    instance,
+    token_counter,
+    choice_counter,
+    duration_histogram,
+    response_dict,
+    duration,
+    is_streaming: bool = False,
 ):
     shared_attributes = metric_shared_attributes(
         response_model=response_dict.get("model") or None,
         operation="chat",
         server_address=_get_openai_base_url(instance),
-        is_streaming=False,
+        is_streaming=is_streaming,
     )
 
     # token metrics
@@ -420,7 +430,8 @@ async def _set_prompts(span, messages):
                 content = json.dumps(content)
             _set_span_attribute(span, f"{prefix}.content", content)
         if msg.get("tool_call_id"):
-            _set_span_attribute(span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
+            _set_span_attribute(
+                span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
         tool_calls = msg.get("tool_calls")
         if tool_calls:
             for i, tool_call in enumerate(tool_calls):
@@ -476,9 +487,11 @@ def _set_completions(span, choices):
         _set_span_attribute(span, f"{prefix}.role", message.get("role"))
 
         if message.get("refusal"):
-            _set_span_attribute(span, f"{prefix}.refusal", message.get("refusal"))
+            _set_span_attribute(
+                span, f"{prefix}.refusal", message.get("refusal"))
         else:
-            _set_span_attribute(span, f"{prefix}.content", message.get("content"))
+            _set_span_attribute(
+                span, f"{prefix}.content", message.get("content"))
 
         function_call = message.get("function_call")
         if function_call:
@@ -533,7 +546,8 @@ def _set_streaming_token_metrics(
     # If API response doesn't have usage, fallback to tiktoken calculation
     if prompt_usage == -1 or completion_usage == -1:
         model_name = (
-            complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
+            complete_response.get("model") or request_kwargs.get(
+                "model") or "gpt-4"
         )
 
         # Calculate prompt tokens if not available from API
@@ -543,7 +557,8 @@ def _set_streaming_token_metrics(
                 if msg.get("content"):
                     prompt_content += msg.get("content")
             if model_name and should_record_stream_token_usage():
-                prompt_usage = get_token_count_from_string(prompt_content, model_name)
+                prompt_usage = get_token_count_from_string(
+                    prompt_content, model_name)
 
         # Calculate completion tokens if not available from API
         if completion_usage == -1 and complete_response.get("choices"):
@@ -566,7 +581,8 @@ def _set_streaming_token_metrics(
                 **shared_attributes,
                 SpanAttributes.LLM_TOKEN_TYPE: "input",
             }
-            token_counter.record(prompt_usage, attributes=attributes_with_token_type)
+            token_counter.record(
+                prompt_usage, attributes=attributes_with_token_type)
 
         if isinstance(completion_usage, int) and completion_usage >= 0:
             attributes_with_token_type = {
@@ -619,11 +635,34 @@ def __init__(
         self._time_of_first_token = self._start_time
         self._complete_response = {"choices": [], "model": ""}
 
+        # Cleanup state tracking to prevent duplicate operations
+        self._cleanup_completed = False
+        self._cleanup_lock = threading.Lock()
+
+    def __del__(self):
+        """Cleanup when object is garbage collected"""
+        if hasattr(self, '_cleanup_completed') and not self._cleanup_completed:
+            self._ensure_cleanup()
+
     def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+        cleanup_exception = None
+        try:
+            self._ensure_cleanup()
+        except Exception as e:
+            cleanup_exception = e
+            # Don't re-raise to avoid masking original exception
+
+        result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
+
+        if cleanup_exception:
+            # Log cleanup exception but don't affect context manager behavior
+            logger.debug(
+                "Error during ChatStream cleanup in __exit__: %s", cleanup_exception)
+
+        return result
 
     async def __aenter__(self):
         return self
@@ -643,6 +682,11 @@ def __next__(self):
         except Exception as e:
             if isinstance(e, StopIteration):
                 self._process_complete_response()
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
             raise
         else:
             self._process_item(chunk)
@@ -654,13 +698,19 @@ async def __anext__(self):
         except Exception as e:
             if isinstance(e, StopAsyncIteration):
                 self._process_complete_response()
+            else:
+                # Handle cleanup for other exceptions during stream iteration
+                self._ensure_cleanup()
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.ERROR, str(e)))
             raise
         else:
             self._process_item(chunk)
             return chunk
 
     def _process_item(self, item):
-        self._span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
+        self._span.add_event(
+            name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
 
         if self._first_token and self._streaming_time_to_first_token:
             self._time_of_first_token = time.time()
@@ -721,10 +771,82 @@ def _process_complete_response(self):
                 emit_event(_parse_choice_event(choice))
         else:
             if should_send_prompts():
-                _set_completions(self._span, self._complete_response.get("choices"))
+                _set_completions(
+                    self._span, self._complete_response.get("choices"))
 
         self._span.set_status(Status(StatusCode.OK))
         self._span.end()
+        self._cleanup_completed = True
+
+    @dont_throw
+    def _ensure_cleanup(self):
+        """Thread-safe cleanup method that handles different cleanup scenarios"""
+        with self._cleanup_lock:
+            if self._cleanup_completed:
+                logger.debug("ChatStream cleanup already completed, skipping")
+                return
+
+            try:
+                logger.debug("Starting ChatStream cleanup")
+
+                # Calculate partial metrics based on available data
+                self._record_partial_metrics()
+
+                # Set span status and close it
+                if self._span and self._span.is_recording():
+                    self._span.set_status(Status(StatusCode.OK))
+                    self._span.end()
+                    logger.debug("ChatStream span closed successfully")
+
+                self._cleanup_completed = True
+                logger.debug("ChatStream cleanup completed successfully")
+
+            except Exception as e:
+                # Log cleanup errors but don't propagate to avoid masking original issues
+                logger.debug("Error during ChatStream cleanup: %s", str(e))
+
+                # Still try to close the span even if metrics recording failed
+                try:
+                    if self._span and self._span.is_recording():
+                        self._span.set_status(
+                            Status(StatusCode.ERROR, "Cleanup failed"))
+                        self._span.end()
+                    self._cleanup_completed = True
+                except Exception:
+                    # Final fallback - just mark as completed to prevent infinite loops
+                    self._cleanup_completed = True
+
+    @dont_throw
+    def _record_partial_metrics(self):
+        """Record metrics based on available partial data"""
+        # Always record duration if we have start time
+        if self._start_time and isinstance(self._start_time, (float, int)) and self._duration_histogram:
+            duration = time.time() - self._start_time
+            self._duration_histogram.record(
+                duration, attributes=self._shared_attributes()
+            )
+
+        # Record basic span attributes even without complete response
+        if self._span and self._span.is_recording():
+            _set_response_attributes(self._span, self._complete_response)
+
+        # Record partial token metrics if we have any data
+        if self._complete_response.get("choices") or self._request_kwargs:
+            _set_streaming_token_metrics(
+                self._request_kwargs,
+                self._complete_response,
+                self._span,
+                self._token_counter,
+                self._shared_attributes(),
+            )
+
+        # Record choice metrics if we have any choices processed
+        if self._choice_counter and self._complete_response.get("choices"):
+            _set_choice_counter_metrics(
+                self._choice_counter,
+                self._complete_response.get("choices"),
+                self._shared_attributes(),
+            )
 
 
 # Backward compatibility with OpenAI v0
@@ -755,7 +877,8 @@ def _build_from_streaming_response(
 
         if first_token and streaming_time_to_first_token:
             time_of_first_token = time.time()
-            streaming_time_to_first_token.record(time_of_first_token - start_time)
+            streaming_time_to_first_token.record(
+                time_of_first_token - start_time)
             first_token = False
 
         _accumulate_stream_items(item, complete_response)
@@ -825,7 +948,8 @@ async def _abuild_from_streaming_response(
 
         if first_token and streaming_time_to_first_token:
             time_of_first_token = time.time()
-            streaming_time_to_first_token.record(time_of_first_token - start_time)
+            streaming_time_to_first_token.record(
+                time_of_first_token - start_time)
             first_token = False
 
         _accumulate_stream_items(item, complete_response)
@@ -943,7 +1067,8 @@ def _(choice: dict) -> ChoiceEvent:
 
     content = choice.get("message").get("content", "") if has_message else None
     role = choice.get("message").get("role") if has_message else "unknown"
-    finish_reason = choice.get("finish_reason") if has_finish_reason else "unknown"
+    finish_reason = choice.get(
+        "finish_reason") if has_finish_reason else "unknown"
 
     if has_tool_calls and has_function_call:
         tool_calls = message.get("tool_calls") + [message.get("function_call")]
@@ -982,7 +1107,8 @@ def _accumulate_stream_items(item, complete_response):
 
     # prompt filter results
     if item.get("prompt_filter_results"):
-        complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
+        complete_response["prompt_filter_results"] = item.get(
+            "prompt_filter_results")
 
     for choice in item.get("choices"):
         index = choice.get("index")
@@ -1029,4 +1155,5 @@ def _accumulate_stream_items(item, complete_response):
                 if tool_call_function and tool_call_function.get("name"):
                     span_function["name"] = tool_call_function.get("name")
                 if tool_call_function and tool_call_function.get("arguments"):
-                    span_function["arguments"] += tool_call_function.get("arguments")
+                    span_function["arguments"] += tool_call_function.get(
+                        "arguments")
diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_exception_during_consumption.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_exception_during_consumption.yaml
new file mode 100644
index 0000000000..ce779347c4
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_exception_during_consumption.yaml
@@ -0,0 +1,188 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+      "model": "gpt-3.5-turbo", "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '123'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.9.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        did"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        op"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"ente"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"lemetry"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        developer"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        go"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        broke"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        \n"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        they"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        kept"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trying"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trace"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        their"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        steps"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        back"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        too"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        far"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 860b0753cd7c3746-MXP
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Thu, 07 Mar 2024 13:45:54 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=wD9Wagkc4Ap6.Q.aRUAbplsVVDXvdRODC6LKYCogQtg-1709819154-1.0.1.1-punvabozLuqeJrmHjy5TKsBk1OL.bCye2zgkpi0BNtU_J0JYlCoNstsXx0153yhT2Hnz65cmtJnDF9DOnxm0_w;
+        path=/; expires=Thu, 07-Mar-24 14:15:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=sL6NmGZ74pNMoGITQlMm5yxRjRJjCZSAEQGvmqbQ.Ww-1709819154859-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - gpt-3.5-turbo-0125
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '107'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '5000'
+      x-ratelimit-limit-tokens:
+      - '160000'
+      x-ratelimit-remaining-requests:
+      - '4999'
+      x-ratelimit-remaining-tokens:
+      - '159974'
+      x-ratelimit-reset-requests:
+      - 12ms
+      x-ratelimit-reset-tokens:
+      - 9ms
+      x-request-id:
+      - req_6edd136d6f0ccca4685f155ffffda6b3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_memory_leak_prevention.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_memory_leak_prevention.yaml
new file mode 100644
index 0000000000..ce779347c4
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_memory_leak_prevention.yaml
@@ -0,0 +1,188 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+      "model": "gpt-3.5-turbo", "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '123'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.9.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        did"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        op"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"ente"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"lemetry"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        developer"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        go"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        broke"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        \n"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        they"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        kept"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trying"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trace"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        their"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        steps"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        back"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        too"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        far"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 860b0753cd7c3746-MXP
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Thu, 07 Mar 2024 13:45:54 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=wD9Wagkc4Ap6.Q.aRUAbplsVVDXvdRODC6LKYCogQtg-1709819154-1.0.1.1-punvabozLuqeJrmHjy5TKsBk1OL.bCye2zgkpi0BNtU_J0JYlCoNstsXx0153yhT2Hnz65cmtJnDF9DOnxm0_w;
+        path=/; expires=Thu, 07-Mar-24 14:15:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=sL6NmGZ74pNMoGITQlMm5yxRjRJjCZSAEQGvmqbQ.Ww-1709819154859-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - gpt-3.5-turbo-0125
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '107'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '5000'
+      x-ratelimit-limit-tokens:
+      - '160000'
+      x-ratelimit-remaining-requests:
+      - '4999'
+      x-ratelimit-remaining-tokens:
+      - '159974'
+      x-ratelimit-reset-requests:
+      - 12ms
+      x-ratelimit-reset-tokens:
+      - 9ms
+      x-request-id:
+      - req_6edd136d6f0ccca4685f155ffffda6b3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_not_consumed.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_not_consumed.yaml
new file mode 100644
index 0000000000..ce779347c4
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_not_consumed.yaml
@@ -0,0 +1,188 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+      "model": "gpt-3.5-turbo", "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '123'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.9.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        did"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        op"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"ente"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"lemetry"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        developer"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        go"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        broke"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        \n"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        they"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        kept"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trying"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trace"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        their"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        steps"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        back"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        too"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        far"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 860b0753cd7c3746-MXP
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Thu, 07 Mar 2024 13:45:54 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=wD9Wagkc4Ap6.Q.aRUAbplsVVDXvdRODC6LKYCogQtg-1709819154-1.0.1.1-punvabozLuqeJrmHjy5TKsBk1OL.bCye2zgkpi0BNtU_J0JYlCoNstsXx0153yhT2Hnz65cmtJnDF9DOnxm0_w;
+        path=/; expires=Thu, 07-Mar-24 14:15:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=sL6NmGZ74pNMoGITQlMm5yxRjRJjCZSAEQGvmqbQ.Ww-1709819154859-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - gpt-3.5-turbo-0125
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '107'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '5000'
+      x-ratelimit-limit-tokens:
+      - '160000'
+      x-ratelimit-remaining-requests:
+      - '4999'
+      x-ratelimit-remaining-tokens:
+      - '159974'
+      x-ratelimit-reset-requests:
+      - 12ms
+      x-ratelimit-reset-tokens:
+      - 9ms
+      x-request-id:
+      - req_6edd136d6f0ccca4685f155ffffda6b3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_partial_consumption.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_partial_consumption.yaml
new file mode 100644
index 0000000000..ce779347c4
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_streaming_partial_consumption.yaml
@@ -0,0 +1,188 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+      "model": "gpt-3.5-turbo", "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '123'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.9.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        did"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        op"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"ente"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"lemetry"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        developer"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        go"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        broke"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        \n"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        they"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        kept"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trying"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        trace"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        their"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        steps"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        back"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        too"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"
+        far"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-908MECg5dMyTTbJEltubwQXeeWlBA","object":"chat.completion.chunk","created":1709819154,"model":"gpt-3.5-turbo-0125","system_fingerprint":"fp_2b778c6b35","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 860b0753cd7c3746-MXP
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Thu, 07 Mar 2024 13:45:54 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=wD9Wagkc4Ap6.Q.aRUAbplsVVDXvdRODC6LKYCogQtg-1709819154-1.0.1.1-punvabozLuqeJrmHjy5TKsBk1OL.bCye2zgkpi0BNtU_J0JYlCoNstsXx0153yhT2Hnz65cmtJnDF9DOnxm0_w;
+        path=/; expires=Thu, 07-Mar-24 14:15:54 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=sL6NmGZ74pNMoGITQlMm5yxRjRJjCZSAEQGvmqbQ.Ww-1709819154859-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - gpt-3.5-turbo-0125
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '107'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '5000'
+      x-ratelimit-limit-tokens:
+      - '160000'
+      x-ratelimit-remaining-requests:
+      - '4999'
+      x-ratelimit-remaining-tokens:
+      - '159974'
+      x-ratelimit-reset-requests:
+      - 12ms
+      x-ratelimit-reset-tokens:
+      - 9ms
+      x-request-id:
+      - req_6edd136d6f0ccca4685f155ffffda6b3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py
index abd546db77..b4034f4601 100644
--- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py
+++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py
@@ -24,7 +24,8 @@
 def test_chat(instrument_legacy, span_exporter, log_exporter, openai_client):
     openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
     )
 
     spans = span_exporter.get_finished_spans()
@@ -37,7 +38,8 @@ def test_chat(instrument_legacy, span_exporter, log_exporter, openai_client):
         open_ai_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"]
         == "Tell me a joke about opentelemetry"
     )
-    assert open_ai_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
+    assert open_ai_span.attributes.get(
+        f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
     assert (
         open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
         == "https://api.openai.com/v1/"
@@ -48,7 +50,8 @@ def test_chat(instrument_legacy, span_exporter, log_exporter, openai_client):
         )
         == "fp_2b778c6b35"
     )
-    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_IS_STREAMING) is False
     assert (
         open_ai_span.attributes.get("gen_ai.response.id")
         == "chatcmpl-908MD9ivBBLb6EaIjlqwFokntayQK"
@@ -66,7 +69,8 @@ def test_chat_with_events_with_content(
 ):
     response = openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
     )
 
     spans = span_exporter.get_finished_spans()
@@ -86,7 +90,8 @@ def test_chat_with_events_with_content(
         )
         == "fp_2b778c6b35"
     )
-    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_IS_STREAMING) is False
     assert (
         open_ai_span.attributes.get("gen_ai.response.id")
         == "chatcmpl-908MD9ivBBLb6EaIjlqwFokntayQK"
@@ -120,7 +125,8 @@ def test_chat_with_events_with_no_content(
 ):
     openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
     )
 
     spans = span_exporter.get_finished_spans()
@@ -139,7 +145,8 @@ def test_chat_with_events_with_no_content(
         )
         == "fp_2b778c6b35"
     )
-    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_IS_STREAMING) is False
     assert (
         open_ai_span.attributes.get("gen_ai.response.id")
         == "chatcmpl-908MD9ivBBLb6EaIjlqwFokntayQK"
@@ -278,7 +285,8 @@ def test_chat_tool_calls_with_events_with_content(
             }
         ],
     }
-    assert_message_in_logs(logs[0], "gen_ai.assistant.message", assistant_event)
+    assert_message_in_logs(
+        logs[0], "gen_ai.assistant.message", assistant_event)
 
     # Validate the tool message Event
     tool_event = {
@@ -351,7 +359,8 @@ def test_chat_tool_calls_with_events_with_no_content(
             }
         ]
     }
-    assert_message_in_logs(logs[0], "gen_ai.assistant.message", assistant_event)
+    assert_message_in_logs(
+        logs[0], "gen_ai.assistant.message", assistant_event)
 
     # Validate the tool message Event
     tool_event = {}
@@ -485,7 +494,8 @@ def test_chat_pydantic_based_tool_calls_with_events_with_content(
             }
         ],
     }
-    assert_message_in_logs(logs[0], "gen_ai.assistant.message", assistant_event)
+    assert_message_in_logs(
+        logs[0], "gen_ai.assistant.message", assistant_event)
 
     # Validate the tool message Event
     tool_event = {
@@ -558,7 +568,8 @@ def test_chat_pydantic_based_tool_calls_with_events_with_no_content(
             }
         ]
     }
-    assert_message_in_logs(logs[0], "gen_ai.assistant.message", assistant_event)
+    assert_message_in_logs(
+        logs[0], "gen_ai.assistant.message", assistant_event)
 
     # Validate the tool message Event
     tool_event = {}
@@ -573,7 +584,8 @@ def test_chat_pydantic_based_tool_calls_with_events_with_no_content(
 def test_chat_streaming(instrument_legacy, span_exporter, log_exporter, openai_client):
     response = openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         stream=True,
     )
 
@@ -591,7 +603,8 @@ def test_chat_streaming(instrument_legacy, span_exporter, log_exporter, openai_c
         open_ai_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"]
         == "Tell me a joke about opentelemetry"
     )
-    assert open_ai_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
+    assert open_ai_span.attributes.get(
+        f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
     assert (
         open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
         == "https://api.openai.com/v1/"
@@ -605,8 +618,10 @@ def test_chat_streaming(instrument_legacy, span_exporter, log_exporter, openai_c
     completion_tokens = open_ai_span.attributes.get(
         SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
     )
-    prompt_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
-    total_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
+    prompt_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
+    total_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
     assert completion_tokens and prompt_tokens and total_tokens
     assert completion_tokens + prompt_tokens == total_tokens
     assert (
@@ -626,7 +641,8 @@ def test_chat_streaming_with_events_with_content(
 ):
     response = openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         stream=True,
     )
 
@@ -653,8 +669,10 @@ def test_chat_streaming_with_events_with_content(
     completion_tokens = open_ai_span.attributes.get(
         SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
     )
-    prompt_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
-    total_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
+    prompt_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
+    total_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
     assert completion_tokens and prompt_tokens and total_tokens
     assert completion_tokens + prompt_tokens == total_tokens
     assert (
@@ -693,7 +711,8 @@ def test_chat_streaming_with_events_with_no_content(
 ):
     response = openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         stream=True,
     )
 
@@ -720,8 +739,10 @@ def test_chat_streaming_with_events_with_no_content(
     completion_tokens = open_ai_span.attributes.get(
         SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
     )
-    prompt_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
-    total_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
+    prompt_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
+    total_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
     assert completion_tokens and prompt_tokens and total_tokens
     assert completion_tokens + prompt_tokens == total_tokens
     assert (
@@ -748,7 +769,8 @@ async def test_chat_async_streaming(
 ):
     response = await async_openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         stream=True,
     )
 
@@ -766,7 +788,8 @@ async def test_chat_async_streaming(
         open_ai_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"]
         == "Tell me a joke about opentelemetry"
     )
-    assert open_ai_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
+    assert open_ai_span.attributes.get(
+        f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
     assert (
         open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
         == "https://api.openai.com/v1/"
@@ -780,8 +803,10 @@ async def test_chat_async_streaming(
     completion_tokens = open_ai_span.attributes.get(
         SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
     )
-    prompt_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
-    total_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
+    prompt_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
+    total_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
     assert completion_tokens and prompt_tokens and total_tokens
     assert completion_tokens + prompt_tokens == total_tokens
     assert (
@@ -802,7 +827,8 @@ async def test_chat_async_streaming_with_events_with_content(
 ):
     response = await async_openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         stream=True,
     )
 
@@ -829,8 +855,10 @@ async def test_chat_async_streaming_with_events_with_content(
     completion_tokens = open_ai_span.attributes.get(
         SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
     )
-    prompt_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
-    total_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
+    prompt_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
+    total_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
     assert completion_tokens and prompt_tokens and total_tokens
     assert completion_tokens + prompt_tokens == total_tokens
     assert (
@@ -868,7 +896,8 @@ async def test_chat_async_streaming_with_events_with_no_content(
 ):
     response = await async_openai_client.chat.completions.create(
         model="gpt-3.5-turbo",
-        messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         stream=True,
     )
 
@@ -895,8 +924,10 @@ async def test_chat_async_streaming_with_events_with_no_content(
     completion_tokens = open_ai_span.attributes.get(
         SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
     )
-    prompt_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
-    total_tokens = open_ai_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
+    prompt_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS)
+    total_tokens = open_ai_span.attributes.get(
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS)
     assert completion_tokens and prompt_tokens and total_tokens
     assert completion_tokens + prompt_tokens == total_tokens
     assert (
@@ -1289,7 +1320,8 @@ async def test_chat_async_context_propagation_with_events_with_no_content(
 
 
 def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict):
-    assert log.log_record.attributes.get(EventAttributes.EVENT_NAME) == event_name
+    assert log.log_record.attributes.get(
+        EventAttributes.EVENT_NAME) == event_name
     assert (
         log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM)
         == GenAIAttributes.GenAiSystemValues.OPENAI.value
@@ -1444,7 +1476,8 @@ def test_chat_exception(instrument_legacy, span_exporter, openai_client):
     with pytest.raises(Exception):
         openai_client.chat.completions.create(
             model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+            messages=[
+                {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         )
 
     spans = span_exporter.get_finished_spans()
@@ -1461,7 +1494,8 @@ def test_chat_exception(instrument_legacy, span_exporter, openai_client):
         open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
         == "https://api.openai.com/v1/"
     )
-    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_IS_STREAMING) is False
     assert open_ai_span.status.status_code == StatusCode.ERROR
     assert open_ai_span.status.description.startswith("Error code: 401")
     events = open_ai_span.events
@@ -1482,7 +1516,8 @@ async def test_chat_async_exception(instrument_legacy, span_exporter, async_open
     with pytest.raises(Exception):
         await async_openai_client.chat.completions.create(
             model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
+            messages=[
+                {"role": "user", "content": "Tell me a joke about opentelemetry"}],
         )
 
     spans = span_exporter.get_finished_spans()
@@ -1499,7 +1534,8 @@ async def test_chat_async_exception(instrument_legacy, span_exporter, async_open
         open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
         == "https://api.openai.com/v1/"
     )
-    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_IS_STREAMING) is False
     assert open_ai_span.status.status_code == StatusCode.ERROR
     assert open_ai_span.status.description.startswith("Error code: 401")
     events = open_ai_span.events
@@ -1512,3 +1548,240 @@ async def test_chat_async_exception(instrument_legacy, span_exporter, async_open
     assert "openai.AuthenticationError" in event.attributes["exception.stacktrace"]
     assert "invalid_api_key" in event.attributes["exception.stacktrace"]
     assert open_ai_span.attributes.get("error.type") == "AuthenticationError"
+
+
+@pytest.mark.vcr
+def test_chat_streaming_not_consumed(instrument_legacy, span_exporter, log_exporter, reader, openai_client):
+    """Test that streaming responses are properly instrumented even when not consumed"""
+
+    # Create streaming response but don't consume it
+    response = openai_client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        stream=True,
+    )
+
+    # Don't consume the response - this should still create proper traces and metrics
+    del response
+
+    # Force garbage collection to trigger cleanup
+    import gc
+    gc.collect()
+
+    spans = span_exporter.get_finished_spans()
+
+    assert len(spans) == 1
+    open_ai_span = spans[0]
+    assert open_ai_span.name == "openai.chat"
+
+    # Verify span was properly closed
+    assert open_ai_span.status.status_code == StatusCode.OK
+    assert open_ai_span.end_time is not None
+    assert open_ai_span.end_time > open_ai_span.start_time
+
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_REQUEST_MODEL) == "gpt-3.5-turbo"
+    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is True
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_REQUEST_TYPE) == "chat"
+
+    assert open_ai_span.attributes.get(
+        f"{SpanAttributes.LLM_PROMPTS}.0.content") == "Tell me a joke about opentelemetry"
+    assert open_ai_span.attributes.get(
+        f"{SpanAttributes.LLM_PROMPTS}.0.role") == "user"
+
+    # Verify duration metric was recorded even without consuming the stream
+    metrics_data = reader.get_metrics_data()
+    resource_metrics = metrics_data.resource_metrics
+    assert len(resource_metrics) > 0
+
+    scope_metrics = resource_metrics[0].scope_metrics
+    assert len(scope_metrics) > 0
+
+    # Find duration metric
+    duration_metrics = [
+        metric for metric in scope_metrics[0].metrics
+        if metric.name == "gen_ai.client.operation.duration"
+    ]
+
+    assert len(duration_metrics) == 1, "Duration metric should be recorded"
+    duration_metric = duration_metrics[0]
+
+    # Verify metric data
+    assert duration_metric.data.data_points
+    data_point = duration_metric.data.data_points[0]
+    assert data_point.count >= 1, f"Expected count >= 1, got {data_point.count}"
+    assert data_point.sum > 0, f"Duration should be greater than 0, got {data_point.sum}"
+    assert data_point.min > 0, f"Min duration should be greater than 0, got {data_point.min}"
+    assert data_point.max > 0, f"Max duration should be greater than 0, got {data_point.max}"
+
+    # Verify metric attributes
+    attributes = data_point.attributes
+    assert attributes.get(
+        "gen_ai.system") == "openai", f"Expected gen_ai.system=openai, got {attributes.get('gen_ai.system')}"
+    assert attributes.get(
+        "gen_ai.operation.name") == "chat", f"Expected operation=chat, got {attributes.get('gen_ai.operation.name')}"
+
+    streaming_data_points = [
+        dp for dp in duration_metric.data.data_points
+        if dp.attributes.get("stream") is True
+    ]
+    assert len(streaming_data_points) >= 1, (
+        f"Expected at least one streaming data point, got data points with attributes: "
+        f"{[dict(dp.attributes) for dp in duration_metric.data.data_points]}"
+    )
+
+
+@pytest.mark.vcr
+def test_chat_streaming_partial_consumption(instrument_legacy, span_exporter, log_exporter, reader, openai_client):
+    """Test that streaming responses are properly instrumented when partially consumed"""
+
+    response = openai_client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Count to 5"}],
+        stream=True,
+    )
+
+    # Consume only the first chunk
+    first_chunk = next(iter(response))
+    assert first_chunk is not None
+
+    del response
+
+    import gc
+    gc.collect()
+
+    spans = span_exporter.get_finished_spans()
+
+    assert len(spans) == 1
+    open_ai_span = spans[0]
+    assert open_ai_span.name == "openai.chat"
+
+    assert open_ai_span.status.status_code == StatusCode.OK
+    assert open_ai_span.end_time is not None
+
+    assert open_ai_span.attributes.get(
+        SpanAttributes.LLM_REQUEST_MODEL) == "gpt-3.5-turbo"
+    assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is True
+
+    # Should have at least one event from the consumed chunk
+    events = open_ai_span.events
+    assert len(events) >= 1
+
+    metrics_data = reader.get_metrics_data()
+    resource_metrics = metrics_data.resource_metrics
+    assert len(resource_metrics) > 0, "Should have resource metrics"
+
+    scope_metrics = resource_metrics[0].scope_metrics
+    assert len(scope_metrics) > 0, "Should have scope metrics"
+
+    # Find duration metric
+    duration_metrics = [
+        metric for metric in scope_metrics[0].metrics
+        if metric.name == "gen_ai.client.operation.duration"
+    ]
+
+    assert len(duration_metrics) == 1, (
+        f"Duration metric should be recorded, found metrics: "
+        f"{[m.name for m in scope_metrics[0].metrics]}"
+    )
+    duration_metric = duration_metrics[0]
+
+    assert duration_metric.data.data_points, "Duration metric should have data points"
+    data_point = duration_metric.data.data_points[0]
+    assert data_point.count >= 1, f"Expected count >= 1, got {data_point.count}"
+    assert data_point.sum > 0, f"Duration should be greater than 0, got {data_point.sum}"
+
+    attributes = data_point.attributes
+    assert attributes.get(
+        "gen_ai.system") == "openai", f"Expected gen_ai.system=openai, got {attributes.get('gen_ai.system')}"
+    assert attributes.get(
+        "gen_ai.operation.name") == "chat", f"Expected operation=chat, got {attributes.get('gen_ai.operation.name')}"
+
+    streaming_data_points = [
+        dp for dp in duration_metric.data.data_points
+        if dp.attributes.get("stream") is True
+    ]
+    assert len(streaming_data_points) >= 1, (
+        f"Expected at least one streaming data point, got data points with attributes: "
+        f"{[dict(dp.attributes) for dp in duration_metric.data.data_points]}"
+    )
+
+
+@pytest.mark.vcr
+def test_chat_streaming_exception_during_consumption(instrument_legacy, span_exporter, log_exporter, openai_client):
+    """Test that streaming responses handle exceptions during consumption properly"""
+
+    response = openai_client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Tell me a short story"}],
+        stream=True,
+    )
+
+    # Simulate exception during consumption
+    count = 0
+    try:
+        for chunk in response:
+            count += 1
+            if count == 2:  # Interrupt after second chunk
+                raise Exception("Simulated interruption")
+    except Exception as e:
+        # Force cleanup by deleting the response object
+        del response
+        import gc
+        gc.collect()
+        # Re-raise to verify the exception was caught
+        assert "Simulated interruption" in str(e)
+
+    spans = span_exporter.get_finished_spans()
+
+    assert len(spans) == 1
+    open_ai_span = spans[0]
+    assert open_ai_span.name == "openai.chat"
+
+    # Verify span was properly closed (status should be OK since exception was in user code, not in our iterator)
+    assert open_ai_span.status.status_code == StatusCode.OK
+    assert open_ai_span.end_time is not None
+
+    # Should have events from the consumed chunks before exception
+    events = open_ai_span.events
+    assert len(events) >= 2  # At least 2 chunk events before exception
+
+
+@pytest.mark.vcr
+def test_chat_streaming_memory_leak_prevention(instrument_legacy, span_exporter, log_exporter, openai_client):
+    """Test that creating many streams without consuming them doesn't cause memory leaks"""
+    import gc
+    import weakref
+
+    initial_spans = len(span_exporter.get_finished_spans())
+
+    # Create a stream without consuming it
+    response = openai_client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": "Tell me a joke about opentelemetry"}],
+        stream=True,
+    )
+
+    # Create weak reference to track if object is garbage collected
+    weak_ref = weakref.ref(response)
+
+    del response
+
+    gc.collect()
+
+    # Verify object was garbage collected
+    assert weak_ref() is None, "Stream object was not garbage collected"
+
+    # Verify span was properly closed
+    final_spans = span_exporter.get_finished_spans()
+    new_spans = len(final_spans) - initial_spans
+    assert new_spans == 1, f"Expected 1 new span, got {new_spans}"
+
+    # Verify span is properly closed
+    span = final_spans[-1]
+    assert span.name == "openai.chat"
+    assert span.status.status_code == StatusCode.OK
+    assert span.end_time is not None