-
Notifications
You must be signed in to change notification settings - Fork 1k
fix(google-generativeai,vertexai): image support for Gemini models #3340
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
20231b2
535e612
29bbfde
144929b
009db80
ef1b148
0c80bc9
544322e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,3 +1,9 @@ | ||||||||||||||||
| from typing import Callable | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| class Config: | ||||||||||||||||
| exception_logger = None | ||||||||||||||||
| use_legacy_attributes = True | ||||||||||||||||
| upload_base64_image: Callable[[str, str, str, str], str] = ( | ||||||||||||||||
| lambda trace_id, span_id, image_name, base64_string: str | ||||||||||||||||
| ) | ||||||||||||||||
|
Comment on lines
+7
to
+9
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Fix default upload_base64_image: current lambda returns the The default should return a valid string (ideally a harmless placeholder URL) rather than the Apply this diff: - upload_base64_image: Callable[[str, str, str, str], str] = (
- lambda trace_id, span_id, image_name, base64_string: str
- )
+ # Default: do not upload, but return a harmless placeholder URL.
+ upload_base64_image: Callable[[str, str, str, str], str] = (
+ lambda trace_id, span_id, image_name, base64_string: "about:blank"
+ )Optionally, consider letting this be Optional[Callable] with None default and handling the fallback in span_utils for more explicit behavior. I can draft that change across both google_generativeai and vertexai configs to keep them consistent. 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,22 +1,103 @@ | ||
| import json | ||
| import base64 | ||
| import logging | ||
| import asyncio | ||
| from opentelemetry.instrumentation.google_generativeai.utils import ( | ||
| dont_throw, | ||
| should_send_prompts, | ||
| ) | ||
| from opentelemetry.instrumentation.google_generativeai.config import Config | ||
| from opentelemetry.semconv_ai import ( | ||
| SpanAttributes, | ||
| ) | ||
| from opentelemetry.trace.status import Status, StatusCode | ||
|
|
||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def _set_span_attribute(span, name, value): | ||
| if value is not None: | ||
| if value != "": | ||
| span.set_attribute(name, value) | ||
| return | ||
|
|
||
|
|
||
| def _is_image_part(item): | ||
| """Check if item is a Google GenAI Part object containing image data""" | ||
| try: | ||
| # Check if it has the Part attributes we expect for new Google GenAI SDK | ||
| if hasattr(item, 'inline_data') and item.inline_data is not None: | ||
| # Check if it's an image mime type and has data | ||
| if (hasattr(item.inline_data, 'mime_type') and | ||
| item.inline_data.mime_type and | ||
| 'image/' in item.inline_data.mime_type and | ||
| hasattr(item.inline_data, 'data') and | ||
| item.inline_data.data): | ||
| return True | ||
| return False | ||
| except Exception: | ||
| return False | ||
|
nirga marked this conversation as resolved.
|
||
|
|
||
|
|
||
| async def _process_image_part(item, trace_id, span_id, content_index): | ||
| """Process a Google GenAI Part object containing image data""" | ||
| if not Config.upload_base64_image: | ||
| return None | ||
|
|
||
| try: | ||
| # Extract format from mime type (e.g., 'image/jpeg' -> 'jpeg') | ||
| image_format = item.inline_data.mime_type.split('/')[1] if item.inline_data.mime_type else 'unknown' | ||
| image_name = f"content_{content_index}.{image_format}" | ||
|
|
||
| # Convert binary data to base64 string for upload | ||
| binary_data = item.inline_data.data | ||
| base64_string = base64.b64encode(binary_data).decode('utf-8') | ||
|
|
||
| # Upload the base64 data | ||
| url = await Config.upload_base64_image(trace_id, span_id, image_name, base64_string) | ||
|
|
||
| # Return OpenAI-compatible format for consistency across LLM providers | ||
| return { | ||
| "type": "image_url", | ||
| "image_url": {"url": url} | ||
| } | ||
| except Exception as e: | ||
| logger.warning(f"Failed to process image part: {e}") | ||
| # Return None to skip adding this image to the span | ||
| return None | ||
|
Comment on lines
+66
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Return a configurable fallback image URL on upload failure (aligns with PR intent and prior review) On failures you currently return except Exception as e:
logger.warning(f"Failed to process image part: {e}")
- # Return None to skip adding this image to the span
- return None
+ # Graceful fallback
+ fallback_url = getattr(
+ Config,
+ "fallback_async_image_url",
+ getattr(Config, "fallback_image_url", None),
+ )
+ if fallback_url:
+ return {"type": "image_url", "image_url": {"url": fallback_url}}
+ return None except Exception as e:
logger.warning(f"Failed to process image part sync: {e}")
- # Return None to skip adding this image to the span
- return None
+ # Graceful fallback
+ fallback_url = getattr(
+ Config,
+ "fallback_sync_image_url",
+ getattr(Config, "fallback_image_url", None),
+ )
+ if fallback_url:
+ return {"type": "image_url", "image_url": {"url": fallback_url}}
+ return NoneNote: This keeps fallback configuration centralized in Also applies to: 92-95 🤖 Prompt for AI Agents |
||
|
|
||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| def _process_image_part_sync(item, trace_id, span_id, content_index): | ||
| """Synchronous version of image part processing""" | ||
| if not Config.upload_base64_image: | ||
| return None | ||
|
|
||
| try: | ||
| # Extract format from mime type (e.g., 'image/jpeg' -> 'jpeg') | ||
| image_format = item.inline_data.mime_type.split('/')[1] if item.inline_data.mime_type else 'unknown' | ||
| image_name = f"content_{content_index}.{image_format}" | ||
|
|
||
| # Convert binary data to base64 string for upload | ||
| binary_data = item.inline_data.data | ||
| base64_string = base64.b64encode(binary_data).decode('utf-8') | ||
|
|
||
| # Use asyncio.run to call the async upload function in sync context | ||
| url = asyncio.run(Config.upload_base64_image(trace_id, span_id, image_name, base64_string)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing the try/except block around asyncio.run() may lead to RuntimeError if an event loop is already running. Consider reintroducing proper error handling or using a workaround for nested event loops. |
||
|
|
||
| return { | ||
| "type": "image_url", | ||
| "image_url": {"url": url} | ||
| } | ||
| except Exception as e: | ||
| logger.warning(f"Failed to process image part sync: {e}") | ||
| # Return None to skip adding this image to the span | ||
| return None | ||
|
|
||
|
|
||
| @dont_throw | ||
| def set_input_attributes(span, args, kwargs, llm_model): | ||
| async def set_input_attributes(span, args, kwargs, llm_model): | ||
| """Process input arguments, handling both text and image content""" | ||
|
Comment on lines
204
to
+206
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dont_throw on an async function won’t catch in-coroutine exceptions
Two viable fixes (A preferred):
A) Example (utils.py): import functools, inspect, logging, traceback
def dont_throw(func):
logger = logging.getLogger(func.__module__)
if inspect.iscoroutinefunction(func):
@functools.wraps(func)
async def async_wrapper(*args, **kwargs):
try:
return await func(*args, **kwargs)
except Exception as e:
logger.debug(
"OpenLLMetry failed to trace in %s, error: %s",
func.__name__,
traceback.format_exc(),
)
if Config.exception_logger:
Config.exception_logger(e)
return async_wrapper
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.debug(
"OpenLLMetry failed to trace in %s, error: %s",
func.__name__,
traceback.format_exc(),
)
if Config.exception_logger:
Config.exception_logger(e)
return wrapperWould you like me to open a follow-up PR to implement (A) and sweep for other async uses of |
||
| if not span.is_recording(): | ||
|
Comment on lines
204
to
207
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dont_throw wraps an async function incorrectly — exceptions inside the coroutine won’t be caught The current Two viable fixes (prefer A for reuse):
A) Update decorator (utils.py), then keep using # utils.py
import functools, inspect, logging, traceback
def dont_throw(func):
logger = logging.getLogger(func.__module__)
if inspect.iscoroutinefunction(func):
@functools.wraps(func)
async def async_wrapper(*args, **kwargs):
try:
return await func(*args, **kwargs)
except Exception as e:
logger.debug(
"OpenLLMetry failed to trace in %s, error: %s",
func.__name__,
traceback.format_exc(),
)
if Config.exception_logger:
Config.exception_logger(e)
return async_wrapper
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.debug(
"OpenLLMetry failed to trace in %s, error: %s",
func.__name__,
traceback.format_exc(),
)
if Config.exception_logger:
Config.exception_logger(e)
return wrapperB) Alternatively, drop the decorator here and wrap the function body with try/except. 🤖 Prompt for AI Agents
Comment on lines
+205
to
207
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Verification agent 🧩 Analysis chainSignature change to async: audit call sites to ensure proper awaiting or use of the sync wrapper
Run: Expected: All real invocations in async contexts are awaited; sync contexts use Also applies to: 212-216 🏁 Script executed: #!/bin/bash
# Find all call sites
rg -nP --type=py -C2 '\bset_input_attributes\s*\('
echo
echo "Calls that are awaited:"
rg -nP --type=py -C2 'await\s+set_input_attributes\s*\('
echo
echo "Suspicious un-awaited calls (may be false positives if they call the _sync version):"
rg -nP --type=py -C2 '(?<!await\s)\bset_input_attributes\s*\(' | rg -v '_sync\s*\('Length of output: 17620 🏁 Script executed: #!/bin/bash
# Display the head of the async span_utils implementation
sed -n '1,200p' packages/opentelemetry-instrumentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/span_utils.py
echo
echo "— Sync wrapper definitions in span_utils.py —"
rg -nP --type=py '_sync' -n packages/opentelemetry-instrumentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/span_utils.py
echo
echo "— Calls to async set_input_attributes in __init__.py —"
rg -nP --type=py -C2 '\bset_input_attributes\s*\(' packages/opentelemetry-instrumentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/__init__.py
echo
echo "— Calls to sync set_input_attributes_sync in __init__.py —"
rg -nP --type=py -C2 '\bset_input_attributes_sync\s*\(' packages/opentelemetry-instrumentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/__init__.pyLength of output: 9700 🏁 Script executed: #!/bin/bash
# Show the first 200 lines of the google_generativeai instrumentation __init__.py for context
sed -n '1,200p' packages/opentelemetry-instrumentation-google-generativeai/opentelemetry/instrumentation/google_generativeai/__init__.pyLength of output: 5948 Fix async wrapper to use the async The async instrumentation in • Location: async wrapper @_with_tracer_wrapper
async def _awrap(...):
...
_handle_request(span, args, kwargs, llm_model, event_logger)
response = await wrapped(*args, **kwargs)
...• Proposed change: call the async setter directly (and preserve the - _handle_request(span, args, kwargs, llm_model, event_logger)
+ # use async setter in async contexts
+ try:
+ if should_emit_events() and event_logger:
+ emit_message_events(args, kwargs, event_logger)
+ else:
+ await set_input_attributes(span, args, kwargs, llm_model)
+ except Exception:
+ # swallow per @dont_throw contract
+ pass
response = await wrapped(*args, **kwargs)• Remove or deprecate This change ensures that in async flows we properly |
||
| return | ||
|
|
||
|
|
@@ -26,53 +107,219 @@ def set_input_attributes(span, args, kwargs, llm_model): | |
| if "contents" in kwargs: | ||
| contents = kwargs["contents"] | ||
| if isinstance(contents, str): | ||
| # Simple string content in OpenAI format | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.0.content", | ||
| contents, | ||
| json.dumps([{"type": "text", "text": contents}]), | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.0.role", | ||
| "user", | ||
| ) | ||
| elif isinstance(contents, list): | ||
| # Process content list - could be mixed text and Part objects | ||
| for i, content in enumerate(contents): | ||
| processed_content = [] | ||
|
|
||
| if hasattr(content, "parts"): | ||
| for part in content.parts: | ||
| if hasattr(part, "text"): | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.content", | ||
| part.text, | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.role", | ||
| getattr(content, "role", "user"), | ||
| # Content with parts (Google GenAI Content object) | ||
|
nirga marked this conversation as resolved.
Outdated
|
||
| for j, part in enumerate(content.parts): | ||
| if hasattr(part, "text") and part.text: | ||
| processed_content.append({"type": "text", "text": part.text}) | ||
| elif _is_image_part(part): | ||
| processed_image = await _process_image_part( | ||
| part, span.context.trace_id, span.context.span_id, j | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| # Other part types | ||
| processed_content.append({"type": "text", "text": str(part)}) | ||
| elif isinstance(content, str): | ||
| # Direct string in the list | ||
| processed_content.append({"type": "text", "text": content}) | ||
| elif _is_image_part(content): | ||
| # Direct Part object that's an image | ||
| processed_image = await _process_image_part( | ||
| content, span.context.trace_id, span.context.span_id, 0 | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| # Other content types | ||
| processed_content.append({"type": "text", "text": str(content)}) | ||
|
|
||
| if processed_content: | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.content", | ||
| json.dumps(processed_content), | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.role", | ||
| getattr(content, "role", "user"), | ||
| ) | ||
| elif args and len(args) > 0: | ||
| prompt = "" | ||
| for arg in args: | ||
| # Handle args - process each argument | ||
| for i, arg in enumerate(args): | ||
| processed_content = [] | ||
|
|
||
| if isinstance(arg, str): | ||
| prompt = f"{prompt}{arg}\n" | ||
| processed_content.append({"type": "text", "text": arg}) | ||
| elif isinstance(arg, list): | ||
| for subarg in arg: | ||
| prompt = f"{prompt}{subarg}\n" | ||
| if prompt: | ||
| for j, subarg in enumerate(arg): | ||
|
nirga marked this conversation as resolved.
Outdated
|
||
| if isinstance(subarg, str): | ||
| processed_content.append({"type": "text", "text": subarg}) | ||
| elif _is_image_part(subarg): | ||
| processed_image = await _process_image_part( | ||
| subarg, span.context.trace_id, span.context.span_id, j | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| processed_content.append({"type": "text", "text": str(subarg)}) | ||
| elif _is_image_part(arg): | ||
| processed_image = await _process_image_part( | ||
| arg, span.context.trace_id, span.context.span_id, 0 | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| processed_content.append({"type": "text", "text": str(arg)}) | ||
|
|
||
| if processed_content: | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.content", | ||
| json.dumps(processed_content), | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.role", | ||
| "user", | ||
| ) | ||
| elif "prompt" in kwargs: | ||
| _set_span_attribute( | ||
| span, f"{SpanAttributes.LLM_PROMPTS}.0.content", | ||
| json.dumps([{"type": "text", "text": kwargs["prompt"]}]) | ||
| ) | ||
| _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user") | ||
|
|
||
|
|
||
| # Keep sync version for backward compatibility | ||
| @dont_throw | ||
| def set_input_attributes_sync(span, args, kwargs, llm_model): | ||
| """Synchronous version with image processing support""" | ||
| if not span.is_recording(): | ||
| return | ||
|
|
||
| if not should_send_prompts(): | ||
| return | ||
|
|
||
| if "contents" in kwargs: | ||
| contents = kwargs["contents"] | ||
| if isinstance(contents, str): | ||
| # Simple string content in OpenAI format | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.0.content", | ||
| prompt, | ||
| json.dumps([{"type": "text", "text": contents}]), | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.0.role", | ||
| "user", | ||
| ) | ||
| elif isinstance(contents, list): | ||
|
nirga marked this conversation as resolved.
|
||
| # Process content list - could be mixed text and Part objects | ||
| for i, content in enumerate(contents): | ||
| processed_content = [] | ||
|
|
||
| if hasattr(content, "parts"): | ||
| # Content with parts (Google GenAI Content object) | ||
| for j, part in enumerate(content.parts): | ||
| if hasattr(part, "text") and part.text: | ||
| processed_content.append({"type": "text", "text": part.text}) | ||
| elif _is_image_part(part): | ||
| processed_image = _process_image_part_sync( | ||
| part, span.context.trace_id, span.context.span_id, j | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| # Other part types | ||
| processed_content.append({"type": "text", "text": str(part)}) | ||
| elif isinstance(content, str): | ||
| # Direct string in the list | ||
| processed_content.append({"type": "text", "text": content}) | ||
| elif _is_image_part(content): | ||
| # Direct Part object that's an image | ||
| processed_image = _process_image_part_sync( | ||
| content, span.context.trace_id, span.context.span_id, 0 | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| # Other content types | ||
| processed_content.append({"type": "text", "text": str(content)}) | ||
|
|
||
| if processed_content: | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.content", | ||
| json.dumps(processed_content), | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.role", | ||
| getattr(content, "role", "user"), | ||
| ) | ||
| elif args and len(args) > 0: | ||
| # Handle args - process each argument | ||
| for i, arg in enumerate(args): | ||
| processed_content = [] | ||
|
|
||
| if isinstance(arg, str): | ||
| processed_content.append({"type": "text", "text": arg}) | ||
| elif isinstance(arg, list): | ||
| for j, subarg in enumerate(arg): | ||
| if isinstance(subarg, str): | ||
| processed_content.append({"type": "text", "text": subarg}) | ||
| elif _is_image_part(subarg): | ||
| processed_image = _process_image_part_sync( | ||
| subarg, span.context.trace_id, span.context.span_id, j | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| processed_content.append({"type": "text", "text": str(subarg)}) | ||
| elif _is_image_part(arg): | ||
| processed_image = _process_image_part_sync( | ||
| arg, span.context.trace_id, span.context.span_id, 0 | ||
| ) | ||
| if processed_image is not None: | ||
| processed_content.append(processed_image) | ||
| else: | ||
| processed_content.append({"type": "text", "text": str(arg)}) | ||
|
|
||
| if processed_content: | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.content", | ||
| json.dumps(processed_content), | ||
| ) | ||
| _set_span_attribute( | ||
| span, | ||
| f"{SpanAttributes.LLM_PROMPTS}.{i}.role", | ||
| "user", | ||
| ) | ||
| elif "prompt" in kwargs: | ||
| _set_span_attribute( | ||
| span, f"{SpanAttributes.LLM_PROMPTS}.0.content", kwargs["prompt"] | ||
| span, f"{SpanAttributes.LLM_PROMPTS}.0.content", | ||
| json.dumps([{"type": "text", "text": kwargs["prompt"]}]) | ||
| ) | ||
| _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user") | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The lambda returns the type 'str' instead of a string value. This default no-op doesn't match expected behavior. Consider returning a valid string (or an async no-op function) instead of the type.