From 14c28052b484dbe795650e379d0ac537a99fd639 Mon Sep 17 00:00:00 2001 From: heavy-d Date: Tue, 19 Aug 2025 02:44:27 +0200 Subject: [PATCH] Enhance image handling in AgentNode and AgentStreaming classes - Updated image processing logic to ensure compatibility with the Claude API by converting images to base64 format when necessary. - Added debug logging to track image usage, including cases where images are downloaded from a URI or used as-is. - Improved overall robustness of image handling in agent interactions. --- src/nodetool/nodes/nodetool/agents.py | 30 +++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/nodetool/nodes/nodetool/agents.py b/src/nodetool/nodes/nodetool/agents.py index b274bfc..8b01c4a 100644 --- a/src/nodetool/nodes/nodetool/agents.py +++ b/src/nodetool/nodes/nodetool/agents.py @@ -654,7 +654,20 @@ async def process(self, context: ProcessingContext): content.append(MessageTextContent(text=self.prompt)) if self.image.is_set(): - content.append(MessageImageContent(image=self.image)) + # Ensure image is converted to base64 format for Claude API compatibility + if self.image.data: + # Image already has data, just encode it + image_ref = self.image.encode_data_to_uri() + print(f"[DEBUG] AgentNode - Using image data, URI: {image_ref.uri[:100]}{'...' if len(image_ref.uri) > 100 else ''}") + elif self.image.uri: + # Image has URI but no data, need to download it + print(f"[DEBUG] AgentNode - Downloading image from URI: {self.image.uri}") + image_ref = await context.image_from_url(self.image.uri) + print(f"[DEBUG] AgentNode - Downloaded image, new URI: {image_ref.uri[:100]}{'...' if len(image_ref.uri) > 100 else ''}") + else: + image_ref = self.image + print(f"[DEBUG] AgentNode - Using image as-is: {image_ref.uri[:100]}{'...' if len(image_ref.uri) > 100 else ''}") + content.append(MessageImageContent(image=image_ref)) if self.audio.is_set(): content.append(MessageAudioContent(audio=self.audio)) @@ -1240,7 +1253,20 @@ async def gen_process(self, context: ProcessingContext): content.append(MessageTextContent(text=self.prompt)) if self.image.is_set(): - content.append(MessageImageContent(image=self.image)) + # Ensure image is converted to base64 format for Claude API compatibility + if self.image.data: + # Image already has data, just encode it + image_ref = self.image.encode_data_to_uri() + print(f"[DEBUG] AgentStreaming - Using image data, URI: {image_ref.uri[:100]}{'...' if len(image_ref.uri) > 100 else ''}") + elif self.image.uri: + # Image has URI but no data, need to download it + print(f"[DEBUG] AgentStreaming - Downloading image from URI: {self.image.uri}") + image_ref = await context.image_from_url(self.image.uri) + print(f"[DEBUG] AgentStreaming - Downloaded image, new URI: {image_ref.uri[:100]}{'...' if len(image_ref.uri) > 100 else ''}") + else: + image_ref = self.image + print(f"[DEBUG] AgentStreaming - Using image as-is: {image_ref.uri[:100]}{'...' if len(image_ref.uri) > 100 else ''}") + content.append(MessageImageContent(image=image_ref)) if self.audio.is_set(): content.append(MessageAudioContent(audio=self.audio))