diff --git a/src/strands_tools/nova_canvas.py b/src/strands_tools/nova_canvas.py new file mode 100644 index 00000000..089c2cac --- /dev/null +++ b/src/strands_tools/nova_canvas.py @@ -0,0 +1,491 @@ +""" +Image generation tool for Strands Agent using Nova Canvas on Amazon Bedrock. + +This module provides functionality to generate high-quality images using Amazon Bedrock's +Nova Canvas model based on text prompts. It handles the entire image generation +process including API integration, parameter management, response processing, and +local storage of results. + +Key Features: + +1. Image Generation: + • Text-to-image generation using Amazon Nova Canvas + • Customizable generation parameters (height, width, quality, cfg_scale, + seed, style, negative_text) + • Support for pre-defined visual styles: + "3D_ANIMATED_FAMILY_FILM" - A style that alludes to 3D animated + films. Featuring realistic rendering and characters with cartoonish + or exaggerated physical features. + + "DESIGN_SKETCH" - A style featuring hand-drawn line-art without a + lot of wash or fill that is not too refined. This style is used to + convey concepts and ideas. + + "FLAT_VECTOR_ILLUSTRATION" - A flat-color illustration style that + is popular in business communications. + + "GRAPHIC_NOVEL_ILLUSTRATION" - A vivid ink illustration style. + Characters do not have exaggerated features, as with some other more + cartoon-ish styles. + + "MAXIMALISM" - Bright, elaborate, bold, and complex with strong + shapes, and rich details. + + "MIDCENTURY_RETRO" - Alludes to graphic design trends from the + 1940s through 1960s. + + "PHOTOREALISM" - Realistic photography style, including different + repertoires such as stock photography, editorial photography, + journalistic photography, and more. + + "SOFT_DIGITAL_PAINTING" - This style has more finish and refinement + than a sketch. It includes shading, three dimensionality, and texture + that might be lacking in other styles. + +2. Virtual try-on: + • Virtual try-on is an image-guided use case of inpainting in which the + contents of a reference image are superimposed into a source image based + on the guidance of a mask image. + • Use case examples for Virtual try-on are: + 1. Adding a logo or text to an image + 2. Use a human and garment image to generate an image with that same + person wearing it + 3. Place a couch in a living room + +3. Background removal: + • Automatically remove the background of any image, replacing the + background with transparent pixels. + • Useful when you want to later composite the image with other elements + in an image editing app, presentation, or website. + +4. Output Management: + • Automatic local saving with intelligent filename generation + • Base64 encoding/decoding for transmission + • Duplicate filename detection and resolution + • Organized output directory structure + +5. Response Format: + • Rich response with both text and image data + • Status tracking and error handling + • Direct base64 image data for immediate display + • File path reference for local access + +Usage with Strands Agent: +```python +from strands import Agent +from strands_tools import nova_canvas + +agent = Agent(tools=[nova_canvas]) + +# Basic text-to-image generation +agent.tool.nova_canvas( + task_type="TEXT_IMAGE", + text="A steampunk robot playing chess" +) + +# Advanced text-to-image with style and parameters +agent.tool.nova_canvas( + task_type="TEXT_IMAGE", + text="A futuristic city with flying cars", + style="PHOTOREALISM", + width=1024, + height=768, + negative_text="bad lighting, harsh lighting, abstract", + cfg_scale=7.5, + quality="premium" +) + +# Virtual try-on with garment +agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + image_path="person.jpg", + reference_image_path="shirt.jpg", + mask_type="GARMENT", + garment_class="SHORT_SLEEVE_SHIRT", + preserve_face="ON" +) +``` + +See the generate_image function docstring for more details on parameters and options. +""" + +import base64 +import json +import os +import random +import re +from typing import Any + +import boto3 +from strands.types.tools import ToolResult, ToolUse + +TOOL_SPEC = { + "name": "nova_canvas", + "description": "Use Amazon Nova Canvas for image generation, virtual try-on, and background removal tasks", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "task_type": { + "type": "string", + "description": "Required: the task type for Amazon Nova Canvas", + "enum": ["TEXT_IMAGE", "VIRTUAL_TRY_ON", "BACKGROUND_REMOVAL"], + "default": "TEXT_IMAGE", + }, + # TEXT_IMAGE parameters + "text": {"type": "string", "description": "Text prompt for image generation (required for TEXT_IMAGE)"}, + "negative_text": {"type": "string", "description": "Optional: negative text prompt (TEXT_IMAGE only)"}, + "style": { + "type": "string", + "description": "Optional: style for image generation (TEXT_IMAGE only)", + "enum": [ + "3D_ANIMATED_FAMILY_FILM", + "DESIGN_SKETCH", + "FLAT_VECTOR_ILLUSTRATION", + "GRAPHIC_NOVEL_ILLUSTRATION", + "MAXIMALISM", + "MIDCENTURY_RETRO", + "PHOTOREALISM", + "SOFT_DIGITAL_PAINTING", + ], + }, + "width": {"type": "integer", "description": "Optional: image width in pixels (TEXT_IMAGE only)"}, + "height": {"type": "integer", "description": "Optional: image height in pixels (TEXT_IMAGE only)"}, + # VIRTUAL_TRY_ON parameters + "image_path": { + "type": "string", + "description": "Path to source image file to modify (required for VIRTUAL_TRY_ON and \ + BACKGROUND_REMOVAL)", + }, + "reference_image_path": { + "type": "string", + "description": "Path to reference image file containing the object to superimpose \ + (required for VIRTUAL_TRY_ON)", + }, + "mask_type": { + "type": "string", + "description": "Specifies whether the mask is provided as prompt, or garment mask \ + (required for VIRTUAL_TRY_ON)", + "enum": ["GARMENT", "PROMPT"], + }, + "mask_shape": { + "type": "string", + "description": "Defines the shape of the mask bounding box, affecting how reference image \ + is transferred (optional for mask_type GARMET and PROMPT)", + "enum": ["CONTOUR", "BOUNDING_BOX", "DEFAULT"], + }, + "garment_class": { + "type": "string", + "description": "Defines the article of clothing being transferred. Required when mask_type \ + is GARMENT", + "enum": [ + "UPPER_BODY", + "LOWER_BODY", + "FULL_BODY", + "FOOTWEAR", + "LONG_SLEEVE_SHIRT", + "SHORT_SLEEVE_SHIRT", + "NO_SLEEVE_SHIRT", + "OTHER_UPPER_BODY", + "LONG_PANTS", + "SHORT_PANTS", + "OTHER_LOWER_BODY", + "LONG_DRESS", + "SHORT_DRESS", + "FULL_BODY_OUTFIT", + "OTHER_FULL_BODY", + "SHOES", + "BOOTS", + "OTHER_FOOTWEAR", + ], + }, + "long_sleeve_style": { + "type": "string", + "description": "Styling for long sleeve garments (optional for GARMET mask_type and applies \ + only to upper body garments)", + "enum": ["SLEEVE_DOWN", "SLEEVE_UP"], + }, + "tucking_style": { + "type": "string", + "description": "Tucking style option (optional for GARMET mask_type and applies only to upper \ + body garments)", + "enum": ["UNTUCKED", "TUCKED"], + }, + "outer_layer_style": { + "type": "string", + "description": "Styling for outer layer garments (optional for GARMET mask_type and applies only \ + to outer layer, upper body garments)", + "enum": ["CLOSED", "OPEN"], + }, + "mask_prompt": { + "type": "string", + "description": "Natural language text prompt describing regions to edit. Required when mask_type \ + is PROMPT", + }, + "preserve_body_pose": { + "type": "string", + "description": "Optional: whether to preserve the body pose in the output image when a person is \ + detected", + "enum": ["ON", "OFF", "DEFAULT"], + }, + "preserve_hands": { + "type": "string", + "description": "Optional: whether to preserve hands in the output image when a person is detected", + "enum": ["ON", "OFF", "DEFAULT"], + }, + "preserve_face": { + "type": "string", + "description": "Optional: whether to preserve the face in the output image when a person is \ + detected", + "enum": ["OFF", "ON", "DEFAULT"], + }, + "merge_style": { + "type": "string", + "description": "Optional: determines how source and reference images are stitched together", + "enum": ["BALANCED", "SEAMLESS", "DETAILED"], + "default": "BALANCED", + }, + # BACKGROUND_REMOVAL parameters + # (uses image_path parameter defined above) + # Common parameters + "quality": { + "type": "string", + "description": "Image quality", + "enum": ["standard", "premium"], + "default": "standard", + }, + "cfg_scale": { + "type": "number", + "description": "How strictly to adhere to the prompt. Range: 1.1-10", + "minimum": 1.1, + "maximum": 10, + "default": 6.5, + }, + "seed": {"type": "integer", "description": "Seed for reproducible results"}, + "model_id": {"type": "string", "description": "Model ID", "default": "amazon.nova-canvas-v1:0"}, + "region": {"type": "string", "description": "AWS region", "default": "us-east-1"}, + }, + "required": [], + } + }, +} + + +def create_filename(prompt: str) -> str: + """Generate a filename from the prompt text.""" + words = re.findall(r"\w+", prompt.lower())[:5] + filename = "_".join(words) + filename = re.sub(r"[^\w\-_\.]", "_", filename) + return filename[:100] # Limit filename length + + +def encode_image_file(file_path): + """Read an image file and return its base64 encoded string.""" + with open(file_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + + +def nova_canvas(tool: ToolUse, **kwargs: Any) -> ToolResult: + """ + Use Amazon Nova Canvas for image generation, virtual try-on, and background removal. + + This function supports three main task types: + + 1. TEXT_IMAGE - Generate images from text prompts with optional style parameters + 2. VIRTUAL_TRY_ON - Superimpose objects from a reference image onto a source image + 3. BACKGROUND_REMOVAL - Remove the background from an image + """ + try: + tool_use_id = tool["toolUseId"] + tool_input = tool["input"] + + task_type = tool_input.get("task_type", "TEXT_IMAGE") + model_id = tool_input.get("model_id", "amazon.nova-canvas-v1:0") + region = tool_input.get("region", "us-east-1") + + client = boto3.client("bedrock-runtime", region_name=region) + + # Build request based on task type + if task_type == "TEXT_IMAGE": + request_body = { + "taskType": "TEXT_IMAGE", + "textToImageParams": { + "text": tool_input.get("text", "A beautiful landscape"), + }, + "imageGenerationConfig": { + "quality": tool_input.get("quality", "standard"), + "seed": tool_input.get("seed", random.randint(0, 2147483646)), + }, + } + + # Add optional TEXT_IMAGE parameters + if "negative_text" in tool_input: + request_body["textToImageParams"]["negativeText"] = tool_input["negative_text"] + if "style" in tool_input: + request_body["textToImageParams"]["style"] = tool_input["style"] + if "width" in tool_input: + request_body["imageGenerationConfig"]["width"] = tool_input["width"] + if "height" in tool_input: + request_body["imageGenerationConfig"]["height"] = tool_input["height"] + if "cfg_scale" in tool_input: + request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] + + elif task_type == "VIRTUAL_TRY_ON": + # Validate required parameters + if "image_path" not in tool_input: + raise ValueError("image_path is required for VIRTUAL_TRY_ON") + if "reference_image_path" not in tool_input: + raise ValueError("reference_image_path is required for VIRTUAL_TRY_ON") + if "mask_type" not in tool_input: + raise ValueError("mask_type is required for VIRTUAL_TRY_ON") + + # Read and encode images + source_image_b64 = encode_image_file(tool_input["image_path"]) + reference_image_b64 = encode_image_file(tool_input["reference_image_path"]) + + # Initialize request structure + request_body = { + "taskType": "VIRTUAL_TRY_ON", + "virtualTryOnParams": { + "sourceImage": source_image_b64, + "referenceImage": reference_image_b64, + "maskType": tool_input["mask_type"], + }, + "imageGenerationConfig": {"quality": tool_input.get("quality", "standard")}, + } + + # Handle mask type specific parameters + mask_type = tool_input["mask_type"] + + if mask_type == "GARMENT": + if "garment_class" not in tool_input: + raise ValueError("garment_class is required when mask_type is GARMENT") + + garment_mask = {"garmentClass": tool_input["garment_class"]} + + if "mask_shape" in tool_input: + garment_mask["maskShape"] = tool_input["mask_shape"] + + # Add garment styling if any styling options are provided + styling_params = ["long_sleeve_style", "tucking_style", "outer_layer_style"] + if any(param in tool_input for param in styling_params): + garment_mask["garmentStyling"] = {} + + if "long_sleeve_style" in tool_input: + garment_mask["garmentStyling"]["longSleeveStyle"] = tool_input["long_sleeve_style"] + if "tucking_style" in tool_input: + garment_mask["garmentStyling"]["tuckingStyle"] = tool_input["tucking_style"] + if "outer_layer_style" in tool_input: + garment_mask["garmentStyling"]["outerLayerStyle"] = tool_input["outer_layer_style"] + + request_body["virtualTryOnParams"]["garmentBasedMask"] = garment_mask + + elif mask_type == "PROMPT": + if "mask_prompt" not in tool_input: + raise ValueError("mask_prompt is required when mask_type is PROMPT") + + prompt_mask = {"maskPrompt": tool_input["mask_prompt"]} + + if "mask_shape" in tool_input: + prompt_mask["maskShape"] = tool_input["mask_shape"] + + request_body["virtualTryOnParams"]["promptBasedMask"] = prompt_mask + + # Add mask exclusions if any are provided + exclusion_params = ["preserve_body_pose", "preserve_hands", "preserve_face"] + if any(param in tool_input for param in exclusion_params): + request_body["virtualTryOnParams"]["maskExclusions"] = {} + + if "preserve_body_pose" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveBodyPose"] = tool_input[ + "preserve_body_pose" + ] + if "preserve_hands" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveHands"] = tool_input["preserve_hands"] + if "preserve_face" in tool_input: + request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] = tool_input["preserve_face"] + + # Add merge style and return mask options + if "merge_style" in tool_input: + request_body["virtualTryOnParams"]["mergeStyle"] = tool_input["merge_style"] + if "return_mask" in tool_input: + request_body["virtualTryOnParams"]["returnMask"] = tool_input["return_mask"] + + # Add common generation config parameters + if "cfg_scale" in tool_input: + request_body["imageGenerationConfig"]["cfgScale"] = tool_input["cfg_scale"] + if "seed" in tool_input: + request_body["imageGenerationConfig"]["seed"] = tool_input["seed"] + + elif task_type == "BACKGROUND_REMOVAL": + if "image_path" not in tool_input: + raise ValueError("image_path is required for BACKGROUND_REMOVAL") + + # Read and encode image + image_b64 = encode_image_file(tool_input["image_path"]) + + request_body = {"taskType": "BACKGROUND_REMOVAL", "backgroundRemovalParams": {"image": image_b64}} + else: + raise ValueError(f"Unsupported task type: {task_type}") + + # Invoke the model + response = client.invoke_model(modelId=model_id, body=json.dumps(request_body)) + + # Process response + model_response = json.loads(response["body"].read().decode("utf-8")) + + # Extract image data + if "images" in model_response and len(model_response["images"]) > 0: + base64_image_data = model_response["images"][0] + + # Create filename based on task type + if task_type == "TEXT_IMAGE": + filename = create_filename(tool_input.get("prompt", "generated_image")) + elif task_type == "VIRTUAL_TRY_ON": + # Extract filename from source image path + source_filename = os.path.basename(tool_input["image_path"]) + base_name = os.path.splitext(source_filename)[0] + filename = f"{base_name}_try_on" + else: # BACKGROUND_REMOVAL + # Extract filename from image path + source_filename = os.path.basename(tool_input["image_path"]) + base_name = os.path.splitext(source_filename)[0] + filename = f"{base_name}_no_bg" + + # Save image + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + i = 1 + base_image_path = os.path.join(output_dir, f"{filename}.png") + image_path = base_image_path + while os.path.exists(image_path): + image_path = os.path.join(output_dir, f"{filename}_{i}.png") + i += 1 + + with open(image_path, "wb") as file: + file.write(base64.b64decode(base64_image_data)) + + return { + "toolUseId": tool_use_id, + "status": "success", + "content": [ + {"text": f"{task_type} task completed successfully. Image saved to {image_path}"}, + { + "image": { + "format": "png", + "source": {"bytes": base64.b64decode(base64_image_data)}, + } + }, + ], + } + else: + raise ValueError("No image data found in the response") + + except Exception as e: + return { + "toolUseId": tool_use_id, + "status": "error", + "content": [{"text": f"Error generating image: {str(e)}"}], + } diff --git a/tests/test_nova_canvas.py b/tests/test_nova_canvas.py new file mode 100644 index 00000000..2565041e --- /dev/null +++ b/tests/test_nova_canvas.py @@ -0,0 +1,244 @@ +""" +Tests for the generate_image tool. +""" + +import base64 +import json +from unittest.mock import MagicMock, patch + +import pytest +from strands import Agent +from strands_tools import nova_canvas + + +@pytest.fixture +def agent(): + """Create an agent with the generate_image tool loaded.""" + return Agent(tools=[nova_canvas]) + + +def extract_result_text(result): + """Extract the result text from the agent response.""" + if isinstance(result, dict) and "content" in result and isinstance(result["content"], list): + return result["content"][0]["text"] + return str(result) + + +@pytest.fixture +def mock_boto3_client(): + """Mock boto3 client for testing.""" + with patch("boto3.client") as mock_client: + # Set up mock response + mock_body = MagicMock() + mock_body.read.return_value = json.dumps( + {"images": [base64.b64encode(b"mock_image_data").decode("utf-8")]} + ).encode("utf-8") + + mock_client_instance = MagicMock() + mock_client_instance.invoke_model.return_value = {"body": mock_body} + mock_client.return_value = mock_client_instance + + yield mock_client + + +@pytest.fixture +def mock_os_path_exists(): + """Mock os.path.exists for testing.""" + with patch("os.path.exists") as mock_exists: + # First return False for output directory check, then True for file check to test filename incrementing + mock_exists.side_effect = [False, True, True, False] + yield mock_exists + + +@pytest.fixture +def mock_os_makedirs(): + """Mock os.makedirs for testing.""" + with patch("os.makedirs") as mock_makedirs: + yield mock_makedirs + + +@pytest.fixture +def mock_file_open(): + """Mock file open for testing.""" + mock_file = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_file + + with patch("builtins.open", return_value=mock_context) as mock_open: + yield mock_open, mock_file + + +def test_generate_image_direct(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test direct invocation of the generate_image tool.""" + # Create a tool use dictionary similar to how the agent would call it + tool_use = { + "toolUseId": "test-tool-use-id", + "input": { + "task_type": "TEXT_IMAGE", + "text": "A cute robot", + "seed": 123, + "negative_text": "blurry, low resolution, pixelated, grainy, unrealistic", + "style": "DESIGN_SKETCH", + }, + } + + # Call the generate_image function directly + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify the function was called with correct parameters + mock_boto3_client.assert_called_once_with("bedrock-runtime", region_name="us-east-1") + mock_client_instance = mock_boto3_client.return_value + mock_client_instance.invoke_model.assert_called_once() + + # Check the parameters passed to invoke_model + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["textToImageParams"]["text"] == "A cute robot" + assert request_body["textToImageParams"]["style"] == "DESIGN_SKETCH" + assert request_body["textToImageParams"]["negativeText"] == "blurry, low resolution, pixelated, grainy, unrealistic" + assert request_body["imageGenerationConfig"]["seed"] == 123 + + # Verify directory creation + mock_os_makedirs.assert_called_once() + + # Verify file operations + mock_open, mock_file = mock_file_open + mock_file.write.assert_called_once() + + # Check the result + assert result["toolUseId"] == "test-tool-use-id" + assert result["status"] == "success" + assert "TEXT_IMAGE task completed successfully." in result["content"][0]["text"] + assert result["content"][1]["image"]["format"] == "png" + assert isinstance(result["content"][1]["image"]["source"]["bytes"], bytes) + + +def test_generate_image_default_params(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test generate_image with default parameters.""" + tool_use = {"toolUseId": "test-tool-use-id", "input": {"prompt": "A cute robot"}} + + with patch("random.randint", return_value=42): + result = nova_canvas.nova_canvas(tool=tool_use) + + # Check the default parameters were used + mock_client_instance = mock_boto3_client.return_value + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["imageGenerationConfig"]["seed"] == 42 # From our mocked random.randint + assert request_body["imageGenerationConfig"]["quality"] == "standard" + assert result["status"] == "success" + + +def test_generate_image_error_handling(mock_boto3_client): + """Test error handling in generate_image.""" + # Setup boto3 client to raise an exception + mock_client_instance = mock_boto3_client.return_value + mock_client_instance.invoke_model.side_effect = Exception("API error") + + tool_use = {"toolUseId": "test-tool-use-id", "input": {"prompt": "A cute robot"}} + + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify error handling + assert result["status"] == "error" + assert "Error generating image: API error" in result["content"][0]["text"] + + +def test_virtual_try_on(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test virtual try-on functionality.""" + # Mock file reading for images + with patch("builtins.open", mock_file_open[0]): + with patch("strands_tools.nova_canvas.encode_image_file") as mock_encode: + mock_encode.side_effect = ["source_image_b64", "reference_image_b64"] + + tool_use = { + "toolUseId": "test-tool-use-id", + "input": { + "task_type": "VIRTUAL_TRY_ON", + "image_path": "person.jpg", + "reference_image_path": "shirt.jpg", + "mask_type": "GARMENT", + "garment_class": "SHORT_SLEEVE_SHIRT", + "preserve_face": "ON", + }, + } + + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify the function was called with correct parameters + mock_client_instance = mock_boto3_client.return_value + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["taskType"] == "VIRTUAL_TRY_ON" + assert request_body["virtualTryOnParams"]["sourceImage"] == "source_image_b64" + assert request_body["virtualTryOnParams"]["referenceImage"] == "reference_image_b64" + assert request_body["virtualTryOnParams"]["maskType"] == "GARMENT" + assert request_body["virtualTryOnParams"]["garmentBasedMask"]["garmentClass"] == "SHORT_SLEEVE_SHIRT" + assert request_body["virtualTryOnParams"]["maskExclusions"]["preserveFace"] == "ON" + + assert result["status"] == "success" + assert "VIRTUAL_TRY_ON task completed successfully" in result["content"][0]["text"] + + +def test_background_removal(mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test background removal functionality.""" + # Mock file reading for image + with patch("builtins.open", mock_file_open[0]): + with patch("strands_tools.nova_canvas.encode_image_file") as mock_encode: + mock_encode.return_value = "image_b64_data" + + tool_use = { + "toolUseId": "test-tool-use-id", + "input": {"task_type": "BACKGROUND_REMOVAL", "image_path": "photo.jpg"}, + } + + result = nova_canvas.nova_canvas(tool=tool_use) + + # Verify the function was called with correct parameters + mock_client_instance = mock_boto3_client.return_value + args, kwargs = mock_client_instance.invoke_model.call_args + request_body = json.loads(kwargs["body"]) + + assert request_body["taskType"] == "BACKGROUND_REMOVAL" + assert request_body["backgroundRemovalParams"]["image"] == "image_b64_data" + + assert result["status"] == "success" + assert "BACKGROUND_REMOVAL task completed successfully" in result["content"][0]["text"] + + +def test_filename_creation(): + """Test the filename creation logic using regex patterns similar to create_filename.""" + + # Since create_filename is defined inside the function, we'll replicate its functionality + def create_filename_test(prompt: str) -> str: + import re + + words = re.findall(r"\w+", prompt.lower())[:5] + filename = "_".join(words) + filename = re.sub(r"[^\w\-_\.]", "_", filename) + return filename[:100] + + # Test normal prompt + filename = create_filename_test("A cute robot dancing in the rain") + assert filename == "a_cute_robot_dancing_in" + + # Test prompt with special characters + filename = create_filename_test("A cute robot! With @#$% special chars") + assert filename == "a_cute_robot_with_special" + + # Test long prompt + long_prompt = "This is a very long prompt " + "word " * 50 + filename = create_filename_test(long_prompt) + assert len(filename) <= 100 + + +def test_generate_image_via_agent(agent, mock_boto3_client, mock_os_path_exists, mock_os_makedirs, mock_file_open): + """Test image generation (default tool) via the agent interface.""" + # This simulates how the tool would be used through the Agent interface + result = agent.tool.nova_canvas(prompt="Test via agent") + + result_text = extract_result_text(result) + assert "TEXT_IMAGE task completed successfully." in result_text diff --git a/tests_integ/test_nova_canvas.py b/tests_integ/test_nova_canvas.py new file mode 100644 index 00000000..3cf625ca --- /dev/null +++ b/tests_integ/test_nova_canvas.py @@ -0,0 +1,314 @@ +import os + +import pytest +from strands import Agent +from strands_tools import image_reader, nova_canvas + + +@pytest.fixture +def agent(): + """Agent with image generation and reader tools.""" + return Agent(tools=[nova_canvas, image_reader]) + + +def test_generate_and_read_image(agent, tmp_path): + # 1. Generate a lovely dog picture + prompt = "A corgi riding a skateboard in Times Square" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "generated.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"What is the image at `{image_path}`") + assert "dog" in str(semantic_result).lower() or "corgi" in str(semantic_result).lower() + + +def test_remove_background(agent, tmp_path): + # 1. Generate an image + prompt = "A corgi riding a skateboard in Times Square" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "generated.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. Remove the background from the generated image + image_gen_result = agent.tool.nova_canvas( + task_type="BACKGROUND_REMOVAL", + model_id="amazon.nova-canvas-v1:0", + image_path=str(image_path), + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify the image with removed background bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path_no_bg = tmp_path / "generated_no_bg.png" + with open(image_path_no_bg, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path_no_bg), f"Image file not found at {image_path_no_bg}" + read_result = agent.tool.image_reader(image_path=str(image_path_no_bg)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Has the background been removed from the image at `{image_path_no_bg} \ + - compare with image at {image_path}` respond with yes or no first") + assert "yes" in str(semantic_result).lower() + + +def test_virtual_try_on_mask_garment(agent, tmp_path): + # 1. Generate an image of a human standing + prompt = "full body person with a warm, genuine smile standing facing directly at the camera. \ + in a sunny neighberhood with green nature." + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the human standing to temp directory + human_image_path = tmp_path / "human_standing.png" + with open(human_image_path, "wb") as f: + f.write(found_image) + + # 2. Generate an image of a vibrant tech hoodie with AWS written on it + prompt = "Generate a vibrant tech hoodie with AWS written on it" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the hoodie image to temp directory + hoodie_image_path = tmp_path / "ai_hoodie_aws.png" + with open(hoodie_image_path, "wb") as f: + f.write(found_image) + + # 3. Virtual try on the hoodie on human image generated + image_gen_result = agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + model_id="amazon.nova-canvas-v1:0", + image_path=str(human_image_path), + reference_image_path=str(hoodie_image_path), + mask_type="GARMENT", + garment_class="UPPER_BODY", + longSleeveStyle="SLEEVE_DOWN", + ) + + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "hoodie_ai_garmet_try_on.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Does the image at path `{image_path}` contain a a person wearing an AWS hoodie?\ + respond with yes or no first") + assert "yes" in str(semantic_result).lower() + + +def test_virtual_try_on_prompt_mask(agent, tmp_path): + # 1. Generate an image of an empty living room + prompt = "an empty room with a white background and a purple couch in the middle" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the empty living room with purple couch image to temp directory + living_room_image_path = tmp_path / "empty_room_purple_couch.png" + with open(living_room_image_path, "wb") as f: + f.write(found_image) + + # 2. Generate an image of a green couch + prompt = "Generate a green couch with white background" + image_gen_result = agent.tool.nova_canvas( + text=prompt, + task_type="TEXT_IMAGE", + model_id="amazon.nova-canvas-v1:0", + negative_prompt="blurry, low quality", + ) + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save the couch image to temp directory + couch_image_path = tmp_path / "green_couch.png" + with open(couch_image_path, "wb") as f: + f.write(found_image) + + # 3. Virtual try on to superimpose the green couch in place of the purple couch + # in the living room with. + image_gen_result = agent.tool.nova_canvas( + task_type="VIRTUAL_TRY_ON", + model_id="amazon.nova-canvas-v1:0", + image_path=str(living_room_image_path), + reference_image_path=str(couch_image_path), + mask_type="PROMPT", + mask_prompt="purple couch", + ) + + assert image_gen_result["status"] == "success", str(image_gen_result) + content = image_gen_result["content"] + + # Extract and verify image bytes from result + found_image = None + for item in content: + if "image" in item and "source" in item["image"]: + found_image = item["image"]["source"]["bytes"] + assert isinstance(found_image, bytes), "Returned image bytes are not 'bytes' type" + assert len(found_image) > 1000, "Returned image is too small to be valid" + break + assert found_image is not None, "No image bytes found in result" + + # Save image to temp directory + image_path = tmp_path / "living_room_couch_try_on.png" + with open(image_path, "wb") as f: + f.write(found_image) + + # 2. use image_reader tool to verify it's a real image + assert os.path.exists(image_path), f"Image file not found at {image_path}" + read_result = agent.tool.image_reader(image_path=str(image_path)) + assert read_result["status"] == "success", str(read_result) + image_content = read_result["content"][0]["image"] + assert image_content["format"] == "png" + assert isinstance(image_content["source"]["bytes"], bytes) + assert len(image_content["source"]["bytes"]) > 1000 + + # 3. test semantic usage to check if it recognizes dog/corgi + semantic_result = agent(f"Does the image at path `{image_path}` contain a green couch in an empty living room?\ + respond with yes or no first") + print(f"\n Agent response: {semantic_result}") + assert "yes" in str(semantic_result).lower() and "green" in str(semantic_result).lower()