traceloop · nirga · Aug 24, 2025 · Aug 23, 2025 · Aug 23, 2025 · Aug 23, 2025
diff --git a/...ntation-google-generativeai/opentelemetry/instrumentation/google_generativeai/__init__.py b/...ntation-google-generativeai/opentelemetry/instrumentation/google_generativeai/__init__.py
@@ -13,7 +13,7 @@
     emit_message_events,
 )
 from opentelemetry.instrumentation.google_generativeai.span_utils import (
-    set_input_attributes,
+    set_input_attributes_sync,
     set_model_request_attributes,
     set_model_response_attributes,
     set_response_attributes,
@@ -103,7 +103,7 @@ def _handle_request(span, args, kwargs, llm_model, event_logger):
     if should_emit_events() and event_logger:
         emit_message_events(args, kwargs, event_logger)
     else:
-        set_input_attributes(span, args, kwargs, llm_model)
+        set_input_attributes_sync(span, args, kwargs, llm_model)
 
     set_model_request_attributes(span, kwargs, llm_model)
 
@@ -249,10 +249,12 @@ def _wrap(
 class GoogleGenerativeAiInstrumentor(BaseInstrumentor):
     """An instrumentor for Google Generative AI's client library."""
 
-    def __init__(self, exception_logger=None, use_legacy_attributes=True):
+    def __init__(self, exception_logger=None, use_legacy_attributes=True, upload_base64_image=None):
         super().__init__()
         Config.exception_logger = exception_logger
         Config.use_legacy_attributes = use_legacy_attributes
+        if upload_base64_image:
+            Config.upload_base64_image = upload_base64_image
 
     def instrumentation_dependencies(self) -> Collection[str]:
         return ("google-genai >= 1.0.0",)

diff --git a/...mentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/config.py b/...mentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/config.py
@@ -1,3 +1,9 @@
+from typing import Callable
+
+
 class Config:
     exception_logger = None
     use_legacy_attributes = True
+    upload_base64_image: Callable[[str, str, str, str], str] = (
+        lambda trace_id, span_id, image_name, base64_string: str
-        lambda trace_id, span_id, image_name, base64_string: str
+        lambda trace_id, span_id, image_name, base64_string: ""
-        lambda trace_id, span_id, image_name, base64_string: str
+        lambda trace_id, span_id, image_name, base64_string: ""
+    )
-    upload_base64_image: Callable[[str, str, str, str], str] = (
-        lambda trace_id, span_id, image_name, base64_string: str
-    )
+    # Default: do not upload, but return a harmless placeholder URL.
+    upload_base64_image: Callable[[str, str, str, str], str] = (
+        lambda trace_id, span_id, image_name, base64_string: "about:blank"
+    )
-    upload_base64_image: Callable[[str, str, str, str], str] = (
-        lambda trace_id, span_id, image_name, base64_string: str
-    )
+    # Default: do not upload, but return a harmless placeholder URL.
+    upload_base64_image: Callable[[str, str, str, str], str] = (
+        lambda trace_id, span_id, image_name, base64_string: "about:blank"
+    )
diff --git a/...ation-google-generativeai/opentelemetry/instrumentation/google_generativeai/span_utils.py b/...ation-google-generativeai/opentelemetry/instrumentation/google_generativeai/span_utils.py
@@ -1,22 +1,113 @@
+import json
+import base64
+import logging
+import asyncio
 from opentelemetry.instrumentation.google_generativeai.utils import (
     dont_throw,
     should_send_prompts,
 )
+from opentelemetry.instrumentation.google_generativeai.config import Config
 from opentelemetry.semconv_ai import (
     SpanAttributes,
 )
 from opentelemetry.trace.status import Status, StatusCode
 
 
+logger = logging.getLogger(__name__)
+
+
 def _set_span_attribute(span, name, value):
     if value is not None:
         if value != "":
             span.set_attribute(name, value)
     return
 
 
+def _is_image_part(item):
+    """Check if item is a Google GenAI Part object containing image data"""
+    try:
+        # Check if it has the Part attributes we expect for new Google GenAI SDK
+        if hasattr(item, 'inline_data') and item.inline_data is not None:
+            # Check if it's an image mime type and has data
+            if (hasattr(item.inline_data, 'mime_type') and
+                    item.inline_data.mime_type and
+                    'image/' in item.inline_data.mime_type and
+                    hasattr(item.inline_data, 'data') and
+                    item.inline_data.data):
+                return True
+        return False
+    except Exception:
+        return False
+
+
+async def _process_image_part(item, trace_id, span_id, content_index):
+    """Process a Google GenAI Part object containing image data"""
+    if not Config.upload_base64_image:
+        return item
+
+    try:
+        # Extract format from mime type (e.g., 'image/jpeg' -> 'jpeg')
+        image_format = item.inline_data.mime_type.split('/')[1] if item.inline_data.mime_type else 'unknown'
+        image_name = f"content_{content_index}.{image_format}"
+
+        # Convert binary data to base64 string for upload
+        binary_data = item.inline_data.data
+        base64_string = base64.b64encode(binary_data).decode('utf-8')
+
+        # Upload the base64 data
+        url = await Config.upload_base64_image(trace_id, span_id, image_name, base64_string)
+
+        # Return OpenAI-compatible format for consistency across LLM providers
+        return {
+            "type": "image_url",
+            "image_url": {"url": url}
+        }
+    except Exception as e:
+        logger.warning(f"Failed to process image part: {e}")
+        # Return fallback in OpenAI-compatible format
+        return {
+            "type": "image_url",
+            "image_url": {"url": "/fallback/async_image"}
+        }
+
+
+def _process_image_part_sync(item, trace_id, span_id, content_index):
+    """Synchronous version of image part processing"""
+    if not Config.upload_base64_image:
+        return item
+
+    try:
+        # Extract format from mime type (e.g., 'image/jpeg' -> 'jpeg')
+        image_format = item.inline_data.mime_type.split('/')[1] if item.inline_data.mime_type else 'unknown'
+        image_name = f"content_{content_index}.{image_format}"
+
+        # Convert binary data to base64 string for upload
+        binary_data = item.inline_data.data
+        base64_string = base64.b64encode(binary_data).decode('utf-8')
+
+        # Use asyncio.run to call the async upload function in sync context
+        try:
+            url = asyncio.run(Config.upload_base64_image(trace_id, span_id, image_name, base64_string))
+        except Exception as upload_error:
+            logger.warning(f"Failed to upload image: {upload_error}")
+            url = f"/image/{image_name}"  # Fallback URL
+
+        return {
+            "type": "image_url",
+            "image_url": {"url": url}
+        }
+    except Exception as e:
+        logger.warning(f"Failed to process image part sync: {e}")
+        # Return fallback in OpenAI-compatible format
+        return {
+            "type": "image_url",
+            "image_url": {"url": "/fallback/sync_image"}
+        }
+
+
 @dont_throw
-def set_input_attributes(span, args, kwargs, llm_model):
+async def set_input_attributes(span, args, kwargs, llm_model):
+    """Process input arguments, handling both text and image content"""
     if not span.is_recording():
         return
 
@@ -26,53 +117,211 @@ def set_input_attributes(span, args, kwargs, llm_model):
     if "contents" in kwargs:
         contents = kwargs["contents"]
         if isinstance(contents, str):
+            # Simple string content in OpenAI format
             _set_span_attribute(
                 span,
                 f"{SpanAttributes.LLM_PROMPTS}.0.content",
-                contents,
+                json.dumps([{"type": "text", "text": contents}]),
             )
             _set_span_attribute(
                 span,
                 f"{SpanAttributes.LLM_PROMPTS}.0.role",
                 "user",
             )
         elif isinstance(contents, list):
+            # Process content list - could be mixed text and Part objects
             for i, content in enumerate(contents):
+                processed_content = []
+
                 if hasattr(content, "parts"):
-                    for part in content.parts:
-                        if hasattr(part, "text"):
-                            _set_span_attribute(
-                                span,
-                                f"{SpanAttributes.LLM_PROMPTS}.{i}.content",
-                                part.text,
-                            )
-                            _set_span_attribute(
-                                span,
-                                f"{SpanAttributes.LLM_PROMPTS}.{i}.role",
-                                getattr(content, "role", "user"),
+                    # Content with parts (Google GenAI Content object)
+                    for j, part in enumerate(content.parts):
+                        if hasattr(part, "text") and part.text:
+                            processed_content.append({"type": "text", "text": part.text})
+                        elif _is_image_part(part):
+                            processed_image = await _process_image_part(
+                                part, span.context.trace_id, span.context.span_id, j
                             )
+                            processed_content.append(processed_image)
+                        else:
+                            # Other part types
+                            processed_content.append({"type": "text", "text": str(part)})
+                elif isinstance(content, str):
+                    # Direct string in the list
+                    processed_content.append({"type": "text", "text": content})
+                elif _is_image_part(content):
+                    # Direct Part object that's an image
+                    processed_image = await _process_image_part(
+                        content, span.context.trace_id, span.context.span_id, 0
+                    )
+                    processed_content.append(processed_image)
+                else:
+                    # Other content types
+                    processed_content.append({"type": "text", "text": str(content)})
+
+                if processed_content:
+                    _set_span_attribute(
+                        span,
+                        f"{SpanAttributes.LLM_PROMPTS}.{i}.content",
+                        json.dumps(processed_content),
+                    )
+                    _set_span_attribute(
+                        span,
+                        f"{SpanAttributes.LLM_PROMPTS}.{i}.role",
+                        getattr(content, "role", "user"),
+                    )
     elif args and len(args) > 0:
-        prompt = ""
-        for arg in args:
+        # Handle args - process each argument
+        for i, arg in enumerate(args):
+            processed_content = []
+
             if isinstance(arg, str):
-                prompt = f"{prompt}{arg}\n"
+                processed_content.append({"type": "text", "text": arg})
             elif isinstance(arg, list):
-                for subarg in arg:
-                    prompt = f"{prompt}{subarg}\n"
-        if prompt:
+                for j, subarg in enumerate(arg):
+                    if isinstance(subarg, str):
+                        processed_content.append({"type": "text", "text": subarg})
+                    elif _is_image_part(subarg):
+                        processed_image = await _process_image_part(
+                            subarg, span.context.trace_id, span.context.span_id, j
+                        )
+                        processed_content.append(processed_image)
+                    else:
+                        processed_content.append({"type": "text", "text": str(subarg)})
+            elif _is_image_part(arg):
+                processed_image = await _process_image_part(
+                    arg, span.context.trace_id, span.context.span_id, 0
+                )
+                processed_content.append(processed_image)
+            else:
+                processed_content.append({"type": "text", "text": str(arg)})
+
+            if processed_content:
+                _set_span_attribute(
+                    span,
+                    f"{SpanAttributes.LLM_PROMPTS}.{i}.content",
+                    json.dumps(processed_content),
+                )
+                _set_span_attribute(
+                    span,
+                    f"{SpanAttributes.LLM_PROMPTS}.{i}.role",
+                    "user",
+                )
+    elif "prompt" in kwargs:
+        _set_span_attribute(
+            span, f"{SpanAttributes.LLM_PROMPTS}.0.content",
+            json.dumps([{"type": "text", "text": kwargs["prompt"]}])
+        )
+        _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user")
+
+
+# Keep sync version for backward compatibility
+@dont_throw
+def set_input_attributes_sync(span, args, kwargs, llm_model):
+    """Synchronous version with image processing support"""
+    if not span.is_recording():
+        return
+
+    if not should_send_prompts():
+        return
+
+    if "contents" in kwargs:
+        contents = kwargs["contents"]
+        if isinstance(contents, str):
+            # Simple string content in OpenAI format
             _set_span_attribute(
                 span,
                 f"{SpanAttributes.LLM_PROMPTS}.0.content",
-                prompt,
+                json.dumps([{"type": "text", "text": contents}]),
             )
             _set_span_attribute(
                 span,
                 f"{SpanAttributes.LLM_PROMPTS}.0.role",
                 "user",
             )
+        elif isinstance(contents, list):
+            # Process content list - could be mixed text and Part objects
+            for i, content in enumerate(contents):
+                processed_content = []
+
+                if hasattr(content, "parts"):
+                    # Content with parts (Google GenAI Content object)
+                    for j, part in enumerate(content.parts):
+                        if hasattr(part, "text") and part.text:
+                            processed_content.append({"type": "text", "text": part.text})
+                        elif _is_image_part(part):
+                            processed_image = _process_image_part_sync(
+                                part, span.context.trace_id, span.context.span_id, j
+                            )
+                            processed_content.append(processed_image)
+                        else:
+                            # Other part types
+                            processed_content.append({"type": "text", "text": str(part)})
+                elif isinstance(content, str):
+                    # Direct string in the list
+                    processed_content.append({"type": "text", "text": content})
+                elif _is_image_part(content):
+                    # Direct Part object that's an image
+                    processed_image = _process_image_part_sync(
+                        content, span.context.trace_id, span.context.span_id, 0
+                    )
+                    processed_content.append(processed_image)
+                else:
+                    # Other content types
+                    processed_content.append({"type": "text", "text": str(content)})
+
+                if processed_content:
+                    _set_span_attribute(
+                        span,
+                        f"{SpanAttributes.LLM_PROMPTS}.{i}.content",
+                        json.dumps(processed_content),
+                    )
+                    _set_span_attribute(
+                        span,
+                        f"{SpanAttributes.LLM_PROMPTS}.{i}.role",
+                        getattr(content, "role", "user"),
+                    )
+    elif args and len(args) > 0:
+        # Handle args - process each argument
+        for i, arg in enumerate(args):
+            processed_content = []
+
+            if isinstance(arg, str):
+                processed_content.append({"type": "text", "text": arg})
+            elif isinstance(arg, list):
+                for j, subarg in enumerate(arg):
+                    if isinstance(subarg, str):
+                        processed_content.append({"type": "text", "text": subarg})
+                    elif _is_image_part(subarg):
+                        processed_image = _process_image_part_sync(
+                            subarg, span.context.trace_id, span.context.span_id, j
+                        )
+                        processed_content.append(processed_image)
+                    else:
+                        processed_content.append({"type": "text", "text": str(subarg)})
+            elif _is_image_part(arg):
+                processed_image = _process_image_part_sync(
+                    arg, span.context.trace_id, span.context.span_id, 0
+                )
+                processed_content.append(processed_image)
+            else:
+                processed_content.append({"type": "text", "text": str(arg)})
+
+            if processed_content:
+                _set_span_attribute(
+                    span,
+                    f"{SpanAttributes.LLM_PROMPTS}.{i}.content",
+                    json.dumps(processed_content),
+                )
+                _set_span_attribute(
+                    span,
+                    f"{SpanAttributes.LLM_PROMPTS}.{i}.role",
+                    "user",
+                )
     elif "prompt" in kwargs:
         _set_span_attribute(
-            span, f"{SpanAttributes.LLM_PROMPTS}.0.content", kwargs["prompt"]
+            span, f"{SpanAttributes.LLM_PROMPTS}.0.content",
+            json.dumps([{"type": "text", "text": kwargs["prompt"]}])
         )
         _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user")