traceloop · nirga · Aug 12, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/...metry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/...metry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py
@@ -48,6 +48,7 @@
     SpanAttributes,
 )
 from opentelemetry.trace import SpanKind, Tracer
+from opentelemetry import trace
 from opentelemetry.trace.status import Status, StatusCode
 from wrapt import ObjectProxy
 
@@ -86,75 +87,77 @@ def chat_wrapper(
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
     )
 
-    run_async(_handle_request(span, kwargs, instance))
-    try:
-        start_time = time.time()
-        response = wrapped(*args, **kwargs)
-        end_time = time.time()
-    except Exception as e:  # pylint: disable=broad-except
-        end_time = time.time()
-        duration = end_time - start_time if "start_time" in locals() else 0
-
-        attributes = {
-            "error.type": e.__class__.__name__,
-        }
-
-        if duration > 0 and duration_histogram:
-            duration_histogram.record(duration, attributes=attributes)
-        if exception_counter:
-            exception_counter.add(1, attributes=attributes)
-
-        span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
-        span.set_status(Status(StatusCode.ERROR, str(e)))
-        span.end()
+    # Use the span as current context to ensure events get proper trace context
+    with trace.use_span(span, end_on_exit=False):
+        run_async(_handle_request(span, kwargs, instance))
+        try:
+            start_time = time.time()
+            response = wrapped(*args, **kwargs)
+            end_time = time.time()
+        except Exception as e:  # pylint: disable=broad-except
+            end_time = time.time()
+            duration = end_time - start_time if "start_time" in locals() else 0
+
+            attributes = {
+                "error.type": e.__class__.__name__,
+            }
 
-        raise
+            if duration > 0 and duration_histogram:
+                duration_histogram.record(duration, attributes=attributes)
+            if exception_counter:
+                exception_counter.add(1, attributes=attributes)
 
-    if is_streaming_response(response):
-        # span will be closed after the generator is done
-        if is_openai_v1():
-            return ChatStream(
-                span,
-                response,
-                instance,
-                token_counter,
-                choice_counter,
-                duration_histogram,
-                streaming_time_to_first_token,
-                streaming_time_to_generate,
-                start_time,
-                kwargs,
-            )
-        else:
-            return _build_from_streaming_response(
-                span,
-                response,
-                instance,
-                token_counter,
-                choice_counter,
-                duration_histogram,
-                streaming_time_to_first_token,
-                streaming_time_to_generate,
-                start_time,
-                kwargs,
-            )
+            span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+            span.record_exception(e)
+            span.set_status(Status(StatusCode.ERROR, str(e)))
+            span.end()
 
-    duration = end_time - start_time
+            raise
 
-    _handle_response(
-        response,
-        span,
-        instance,
-        token_counter,
-        choice_counter,
-        duration_histogram,
-        duration,
-    )
+        if is_streaming_response(response):
+            # span will be closed after the generator is done
+            if is_openai_v1():
+                return ChatStream(
+                    span,
+                    response,
+                    instance,
+                    token_counter,
+                    choice_counter,
+                    duration_histogram,
+                    streaming_time_to_first_token,
+                    streaming_time_to_generate,
+                    start_time,
+                    kwargs,
+                )
+            else:
+                return _build_from_streaming_response(
+                    span,
+                    response,
+                    instance,
+                    token_counter,
+                    choice_counter,
+                    duration_histogram,
+                    streaming_time_to_first_token,
+                    streaming_time_to_generate,
+                    start_time,
+                    kwargs,
+                )
 
-    span.end()
+        duration = end_time - start_time
 
-    return response
+        _handle_response(
+            response,
+            span,
+            instance,
+            token_counter,
+            choice_counter,
+            duration_histogram,
+            duration,
+        )
+
+        span.end()
+
+        return response
 
 
 @_with_chat_telemetry_wrapper
@@ -182,78 +185,80 @@ async def achat_wrapper(
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
     )
 
-    await _handle_request(span, kwargs, instance)
+    # Use the span as current context to ensure events get proper trace context
+    with trace.use_span(span, end_on_exit=False):
+        await _handle_request(span, kwargs, instance)
 
-    try:
-        start_time = time.time()
-        response = await wrapped(*args, **kwargs)
-        end_time = time.time()
-    except Exception as e:  # pylint: disable=broad-except
-        end_time = time.time()
-        duration = end_time - start_time if "start_time" in locals() else 0
-
-        common_attributes = Config.get_common_metrics_attributes()
-        attributes = {
-            **common_attributes,
-            "error.type": e.__class__.__name__,
-        }
-
-        if duration > 0 and duration_histogram:
-            duration_histogram.record(duration, attributes=attributes)
-        if exception_counter:
-            exception_counter.add(1, attributes=attributes)
-
-        span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
-        span.set_status(Status(StatusCode.ERROR, str(e)))
-        span.end()
+        try:
+            start_time = time.time()
+            response = await wrapped(*args, **kwargs)
+            end_time = time.time()
+        except Exception as e:  # pylint: disable=broad-except
+            end_time = time.time()
+            duration = end_time - start_time if "start_time" in locals() else 0
+
+            common_attributes = Config.get_common_metrics_attributes()
+            attributes = {
+                **common_attributes,
+                "error.type": e.__class__.__name__,
+            }
 
-        raise
+            if duration > 0 and duration_histogram:
+                duration_histogram.record(duration, attributes=attributes)
+            if exception_counter:
+                exception_counter.add(1, attributes=attributes)
 
-    if is_streaming_response(response):
-        # span will be closed after the generator is done
-        if is_openai_v1():
-            return ChatStream(
-                span,
-                response,
-                instance,
-                token_counter,
-                choice_counter,
-                duration_histogram,
-                streaming_time_to_first_token,
-                streaming_time_to_generate,
-                start_time,
-                kwargs,
-            )
-        else:
-            return _abuild_from_streaming_response(
-                span,
-                response,
-                instance,
-                token_counter,
-                choice_counter,
-                duration_histogram,
-                streaming_time_to_first_token,
-                streaming_time_to_generate,
-                start_time,
-                kwargs,
-            )
+            span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+            span.record_exception(e)
+            span.set_status(Status(StatusCode.ERROR, str(e)))
+            span.end()
 
-    duration = end_time - start_time
+            raise
 
-    _handle_response(
-        response,
-        span,
-        instance,
-        token_counter,
-        choice_counter,
-        duration_histogram,
-        duration,
-    )
+        if is_streaming_response(response):
+            # span will be closed after the generator is done
+            if is_openai_v1():
+                return ChatStream(
+                    span,
+                    response,
+                    instance,
+                    token_counter,
+                    choice_counter,
+                    duration_histogram,
+                    streaming_time_to_first_token,
+                    streaming_time_to_generate,
+                    start_time,
+                    kwargs,
+                )
+            else:
+                return _abuild_from_streaming_response(
+                    span,
+                    response,
+                    instance,
+                    token_counter,
+                    choice_counter,
+                    duration_histogram,
+                    streaming_time_to_first_token,
+                    streaming_time_to_generate,
+                    start_time,
+                    kwargs,
+                )
 
-    span.end()
+        duration = end_time - start_time
 
-    return response
+        _handle_response(
+            response,
+            span,
+            instance,
+            token_counter,
+            choice_counter,
+            duration_histogram,
+            duration,
+        )
+
+        span.end()
+
+        return response
 
 
 @dont_throw

diff --git a/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py b/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py
@@ -1,6 +1,7 @@
 import logging
 
 from opentelemetry import context as context_api
+from opentelemetry import trace
 from opentelemetry.instrumentation.openai.shared import (
     _set_client_attributes,
     _set_functions_attributes,
@@ -55,25 +56,27 @@ def completion_wrapper(tracer, wrapped, instance, args, kwargs):
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
     )
 
-    _handle_request(span, kwargs, instance)
+    # Use the span as current context to ensure events get proper trace context
+    with trace.use_span(span, end_on_exit=False):
+        _handle_request(span, kwargs, instance)
+
+        try:
+            response = wrapped(*args, **kwargs)
+        except Exception as e:
+            span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+            span.record_exception(e)
+            span.set_status(Status(StatusCode.ERROR, str(e)))
+            span.end()
+            raise
+
+        if is_streaming_response(response):
+            # span will be closed after the generator is done
+            return _build_from_streaming_response(span, kwargs, response)
+        else:
+            _handle_response(response, span, instance)
 
-        if is_streaming_response(response):
-            # span will be closed after the generator is done
-            return _build_from_streaming_response(span, kwargs, response)
-        else:
-            _handle_response(response, span, instance)
+def _build_from_streaming_response(span, request_kwargs, response):
+    # Keep span current for events and finalization
+    with trace.use_span(span, end_on_exit=False):
+        complete_response = {"choices": [], "model": "", "id": ""}
+        for item in response:
+            yield item
+            _accumulate_streaming_response(complete_response, item)
+        _set_response_attributes(span, complete_response)
+        _set_token_usage(span, request_kwargs, complete_response)
+        if should_emit_events():
+            _emit_streaming_response_events(complete_response)
+        elif should_send_prompts():
+            _set_completions(span, complete_response.get("choices"))
+        span.set_status(Status(StatusCode.OK))
+        span.end()
+
+async def _abuild_from_streaming_response(span, request_kwargs, response):
+    # Keep span current for events and finalization
+    with trace.use_span(span, end_on_exit=False):
+        complete_response = {"choices": [], "model": "", "id": ""}
+        async for item in response:
+            yield item
+            _accumulate_streaming_response(complete_response, item)
+        _set_response_attributes(span, complete_response)
+        _set_token_usage(span, request_kwargs, complete_response)
+        if should_emit_events():
+            _emit_streaming_response_events(complete_response)
+        elif should_send_prompts():
+            _set_completions(span, complete_response.get("choices"))
+        span.set_status(Status(StatusCode.OK))
+        span.end()
-        if is_streaming_response(response):
-            # span will be closed after the generator is done
-            return _build_from_streaming_response(span, kwargs, response)
-        else:
-            _handle_response(response, span, instance)
+def _build_from_streaming_response(span, request_kwargs, response):
+    # Keep span current for events and finalization
+    with trace.use_span(span, end_on_exit=False):
+        complete_response = {"choices": [], "model": "", "id": ""}
+        for item in response:
+            yield item
+            _accumulate_streaming_response(complete_response, item)
+        _set_response_attributes(span, complete_response)
+        _set_token_usage(span, request_kwargs, complete_response)
+        if should_emit_events():
+            _emit_streaming_response_events(complete_response)
+        elif should_send_prompts():
+            _set_completions(span, complete_response.get("choices"))
+        span.set_status(Status(StatusCode.OK))
+        span.end()
+
+async def _abuild_from_streaming_response(span, request_kwargs, response):
+    # Keep span current for events and finalization
+    with trace.use_span(span, end_on_exit=False):
+        complete_response = {"choices": [], "model": "", "id": ""}
+        async for item in response:
+            yield item
+            _accumulate_streaming_response(complete_response, item)
+        _set_response_attributes(span, complete_response)
+        _set_token_usage(span, request_kwargs, complete_response)
+        if should_emit_events():
+            _emit_streaming_response_events(complete_response)
+        elif should_send_prompts():
+            _set_completions(span, complete_response.get("choices"))
+        span.set_status(Status(StatusCode.OK))
+        span.end()
-    try:
-        response = wrapped(*args, **kwargs)
-    except Exception as e:
-        span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
-        span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
-        raise
-
-    if is_streaming_response(response):
-        # span will be closed after the generator is done
-        return _build_from_streaming_response(span, kwargs, response)
-    else:
-        _handle_response(response, span, instance)
-
-    span.end()
-    return response
+        return response
 
 
 @_with_tracer_wrapper
@@ -89,25 +92,27 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
     )
 
-    _handle_request(span, kwargs, instance)
+    # Use the span as current context to ensure events get proper trace context
+    with trace.use_span(span, end_on_exit=False):
+        _handle_request(span, kwargs, instance)
+
+        try:
+            response = await wrapped(*args, **kwargs)
+        except Exception as e:
+            span.set_attribute(ERROR_TYPE, e.__class__.__name__)
+            span.record_exception(e)
+            span.set_status(Status(StatusCode.ERROR, str(e)))
+            span.end()
+            raise
+
+        if is_streaming_response(response):
+            # span will be closed after the generator is done
+            return _abuild_from_streaming_response(span, kwargs, response)
+        else:
+            _handle_response(response, span, instance)
 
-    try:
-        response = await wrapped(*args, **kwargs)
-    except Exception as e:
-        span.set_attribute(ERROR_TYPE, e.__class__.__name__)
-        span.record_exception(e)
-        span.set_status(Status(StatusCode.ERROR, str(e)))
         span.end()
-        raise
-
-    if is_streaming_response(response):
-        # span will be closed after the generator is done
-        return _abuild_from_streaming_response(span, kwargs, response)
-    else:
-        _handle_response(response, span, instance)
-
-    span.end()
-    return response
+        return response
 
 
 @dont_throw