strands-agents · zastrowm · Jun 26, 2025 · Jun 17, 2025 · Jun 18, 2025 · Jun 18, 2025
@@ -1,18 +1,21 @@
 """
-Image generation tool for Strands Agent using Stable Diffusion.
+Image generation tool for Strands Agent using Stable Diffusion and Nova Canvas models.
 
 This module provides functionality to generate high-quality images using Amazon Bedrock's
-Stable Diffusion models based on text prompts. It handles the entire image generation
+image generation models based on text prompts. It handles the entire image generation
 process including API integration, parameter management, response processing, and
 local storage of results.
 
 Key Features:
 
 1. Image Generation:
-   • Text-to-image conversion using Stable Diffusion
-   • Support for multiple model variants (primarily stable-diffusion-xl-v1)
-   • Customizable generation parameters (seed, steps, cfg_scale)
-   • Style preset selection for consistent aesthetics
+   • Text-to-image conversion using multiple model providers
+   • Support for the following models:
+        • stability.sd3-5-large-v1:0
+        • stability.stable-image-core-v1:1
+        • stability.stable-image-ultra-v1:1
+        • amazon.nova-canvas-v1:0
+   • Customizable generation parameters (seed, aspect_ratio, output_format, negative_prompt)
 
 2. Output Management:
    • Automatic local saving with intelligent filename generation
@@ -36,14 +39,22 @@
 # Basic usage with default parameters
 agent.tool.generate_image(prompt="A steampunk robot playing chess")
 
-# Advanced usage with custom parameters
+# Advanced usage with Stable Diffusion
 agent.tool.generate_image(
     prompt="A futuristic city with flying cars",
-    model_id="stability.stable-diffusion-xl-v1",
-    seed=42,
-    steps=50,
-    cfg_scale=12,
-    style_preset="cinematic"
+    model_id="stability.sd3-5-large-v1:0",
+    aspect_ratio="5:4",
+    output_format="jpeg",
+    negative_prompt="bad lighting, harsh lighting, abstract, surreal, twisted, multiple levels",
+)
+
+# Using Nova Canvas
+agent.tool.generate_image(
+    prompt="A photograph of a cup of coffee from the side",
+    model_id="amazon.nova-canvas-v1:0",
+    width=1024,
+    height=1024,
+    quality="premium",
 )
 ```
 
@@ -60,9 +71,17 @@
 import boto3
 from strands.types.tools import ToolResult, ToolUse
 
+STABLE_DIFFUSION_MODEL_ID = [
+    "stability.sd3-5-large-v1:0",
+    "stability.stable-image-core-v1:1",
+    "stability.stable-image-ultra-v1:1",
+]
+NOVA_CANVAS_MODEL_ID = "amazon.nova-canvas-v1:0"
+
+
 TOOL_SPEC = {
     "name": "generate_image",
-    "description": "Generates an image using Stable Diffusion based on a given prompt",
+    "description": "Generates an image using Stable Diffusion or Nova Canvas based on a given prompt",
     "inputSchema": {
         "json": {
             "type": "object",
@@ -73,23 +92,48 @@
                 },
                 "model_id": {
                     "type": "string",
-                    "description": "Model id for image model, stability.stable-diffusion-xl-v1.",
+                    "description": "Model id for image model, stability.sd3-5-large-v1:0, \
+                    stability.stable-image-core-v1:1,  stability.stable-image-ultra-v1:1, or amazon.nova-canvas-v1:0",
                 },
                 "seed": {
                     "type": "integer",
                     "description": "Optional: Seed for random number generation (default: random)",
                 },
-                "steps": {
+                "aspect_ratio": {
+                    "type": "string",
+                    "description": "Optional: Controls the aspect ratio of the generated image for \
+                     Stable Diffusion models. Default 1:1. Enum: 16:9, 1:1, 21:9, 2:3, 3:2, 4:5, 5:4, 9:16, 9:21",
+                },
+                "output_format": {
+                    "type": "string",
+                    "description": "Optional: Specifies the format of the output image for Stable Diffusion models. \
+                        Supported formats: JPEG, PNG.",
+                },
+                "negative_prompt": {
+                    "type": "string",
+                    "description": "Optional: Keywords of what you do not wish to see in the output image. \
+                     Max: 10.000 characters.",
+                },
+                "width": {
                     "type": "integer",
-                    "description": "Optional: Number of steps for image generation (default: 30)",
+                    "description": "Optional: Width of the generated image for Nova Canvas model (default: 1024)",
+                },
+                "height": {
+                    "type": "integer",
+                    "description": "Optional: Height of the generated image for Nova Canvas model (default: 1024)",
+                },
+                "quality": {
+                    "type": "string",
+                    "description": "Optional: Quality setting for Nova Canvas model. Options: 'standard' or 'premium' \
+                    (default: 'standard')",
                 },
                 "cfg_scale": {
                     "type": "number",
-                    "description": "Optional: CFG scale for image generation (default: 10)",
+                    "description": "Optional: CFG scale for Nova Canvas model (default: 8.0)",
                 },
-                "style_preset": {
-                    "type": "string",
-                    "description": "Optional: Style preset for image generation (default: 'photographic')",
+                "number_of_images": {
+                    "type": "integer",
+                    "description": "Optional: Number of images to generate for Nova Canvas model (default: 1)",
                 },
             },
             "required": ["prompt"],
@@ -98,19 +142,28 @@
 }
 
 
+# Create a filename based on the prompt
+def create_filename(prompt: str) -> str:
+    """Generate a filename from the prompt text."""
+    words = re.findall(r"\w+", prompt.lower())[:5]
+    filename = "_".join(words)
+    filename = re.sub(r"[^\w\-_\.]", "_", filename)
+    return filename[:100]  # Limit filename length
+
+
 def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
     """
-    Generate images from text prompts using Stable Diffusion via Amazon Bedrock.
+    Generate images from text prompts using Stable Diffusion or Nova Canvas via Amazon Bedrock.
 
     This function transforms textual descriptions into high-quality images using
-    Stable Diffusion models available through Amazon Bedrock. It provides extensive
+    image generation models available through Amazon Bedrock. It provides extensive
     customization options and handles the complete process from API interaction to
     image storage and result formatting.
 
     How It Works:
     ------------
     1. Extracts and validates parameters from the tool input
-    2. Configures the request payload with appropriate parameters
+    2. Configures the request payload with appropriate parameters based on model type
     3. Invokes the Bedrock image generation model through AWS SDK
     4. Processes the response to extract the base64-encoded image
     5. Creates an appropriate filename based on the prompt content
@@ -120,11 +173,22 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
     Generation Parameters:
     --------------------
     - prompt: The textual description of the desired image
-    - model_id: Specific model to use (defaults to stable-diffusion-xl-v1)
+    - model_id: Specific model to use (defaults to stability.stable-image-core-v1:1)
+
+    For Stable Diffusion models:
+    - seed: Controls randomness for reproducible results
+    - aspect_ratio: Controls the aspect ratio of the generated image
+    - output_format: Specifies the format of the output image (e.g., png or jpeg)
+    - negative_prompt: Keywords of what you do not wish to see in the output image
+
+    For Nova Canvas model:
+    - width: Width of the generated image (default: 1024)
+    - height: Height of the generated image (default: 1024)
+    - quality: Quality setting ('standard' or 'premium')
+    - cfg_scale: CFG scale value (default: 8.0)
+    - number_of_images: Number of images to generate (default: 1)
     - seed: Controls randomness for reproducible results
-    - style_preset: Artistic style to apply (e.g., photographic, cinematic)
-    - cfg_scale: Controls how closely the image follows the prompt
-    - steps: Number of diffusion steps (higher = more refined but slower)
+    - negative_prompt: Keywords of what you do not wish to see in the output image
 
     Common Usage Scenarios:
     ---------------------
@@ -137,11 +201,8 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
     Args:
         tool: ToolUse object containing the parameters for image generation.
             - prompt: The text prompt describing the desired image.
-            - model_id: Optional model identifier (default: "stability.stable-diffusion-xl-v1").
-            - seed: Optional random seed (default: random integer).
-            - style_preset: Optional style preset name (default: "photographic").
-            - cfg_scale: Optional CFG scale value (default: 10).
-            - steps: Optional number of diffusion steps (default: 30).
+            - model_id: Optional model identifier.
+            - Additional parameters specific to the chosen model type.
         **kwargs: Additional keyword arguments (unused).
 
     Returns:
@@ -161,74 +222,131 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
         tool_use_id = tool["toolUseId"]
         tool_input = tool["input"]
 
-        # Extract input parameters
+        # Extract common input parameters
         prompt = tool_input.get("prompt", "A stylized picture of a cute old steampunk robot.")
-        model_id = tool_input.get("model_id", "stability.stable-diffusion-xl-v1")
+        model_id = tool_input.get("model_id", "stability.stable-image-core-v1:1")
+        region = (
+            tool_input.get("region", "us-east-1")
+            if model_id not in STABLE_DIFFUSION_MODEL_ID
+            else tool_input.get("region", "us-west-2")
+        )
         seed = tool_input.get("seed", random.randint(0, 4294967295))
-        style_preset = tool_input.get("style_preset", "photographic")
-        cfg_scale = tool_input.get("cfg_scale", 10)
-        steps = tool_input.get("steps", 30)
+        negative_prompt = tool_input.get("negative_prompt", "bad lighting, harsh lighting")
 
         # Create a Bedrock Runtime client
-        client = boto3.client("bedrock-runtime", region_name="us-west-2")
-
-        # Format the request payload
-        native_request = {
-            "text_prompts": [{"text": prompt}],
-            "style_preset": style_preset,
-            "seed": seed,
-            "cfg_scale": cfg_scale,
-            "steps": steps,
-        }
-        request = json.dumps(native_request)
-
-        # Invoke the model
-        response = client.invoke_model(modelId=model_id, body=request)
-
-        # Decode the response body
-        model_response = json.loads(response["body"].read())
-
-        # Extract the image data
-        base64_image_data = model_response["artifacts"][0]["base64"]
-
-        # Create a filename based on the prompt
-        def create_filename(prompt: str) -> str:
-            """Generate a filename from the prompt text."""
-            words = re.findall(r"\w+", prompt.lower())[:5]
-            filename = "_".join(words)
-            filename = re.sub(r"[^\w\-_\.]", "_", filename)
-            return filename[:100]  # Limit filename length
-
-        filename = create_filename(prompt)
-
-        # Save the generated image to a local folder
-        output_dir = "output"
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-
-        i = 1
-        base_image_path = os.path.join(output_dir, f"{filename}.png")
-        image_path = base_image_path
-        while os.path.exists(image_path):
-            image_path = os.path.join(output_dir, f"{filename}_{i}.png")
-            i += 1
-
-        with open(image_path, "wb") as file:
-            file.write(base64.b64decode(base64_image_data))
-
-        return {
-            "toolUseId": tool_use_id,
-            "status": "success",
-            "content": [
-                {"text": f"The generated image has been saved locally to {image_path}. "},
-                {
-                    "image": {
-                        "format": "png",
-                        "source": {"bytes": base64.b64decode(base64_image_data)},
-                    }
+        client = boto3.client("bedrock-runtime", region_name=region)
+
+        # Initialize variables for later use
+        base64_image_data = None
+        output_format = "jpeg"  # Default format
+
+        # Format the request payload based on model type
+        if (
+            model_id == "stability.sd3-5-large-v1:0"
+            or model_id == "stability.stable-image-core-v1:1"
+            or model_id == "stability.stable-image-ultra-v1:1"
+        ):
+            # Stable Diffusion specific parameters
+            aspect_ratio = tool_input.get("aspect_ratio", "1:1")
+            output_format = tool_input.get("output_format", "jpeg")
+
+            native_request = {
+                "prompt": prompt,
+                "aspect_ratio": aspect_ratio,
+                "seed": seed,
+                "output_format": output_format,
+                "negative_prompt": negative_prompt,
+            }
+            request = json.dumps(native_request)
+
+            # Invoke the model
+            response = client.invoke_model(modelId=model_id, body=request)
+
+            # Decode the response body
+            model_response = json.loads(response["body"].read().decode("utf-8"))
+
+            # Extract the image data
+            base64_image_data = model_response["images"][0]
+
+        elif model_id == "amazon.nova-canvas-v1:0":
+            # Nova Canvas specific parameters
+            width = tool_input.get("width", 1024)
+            height = tool_input.get("height", 1024)
+            quality = tool_input.get("quality", "standard")
+            cfg_scale = tool_input.get("cfg_scale", 8.0)
+            number_of_images = tool_input.get("number_of_images", 1)
+
+            # Format the Nova Canvas request
+            nova_request = {
+                "taskType": "TEXT_IMAGE",
+                "textToImageParams": {"text": prompt, "negativeText": negative_prompt},
+                "imageGenerationConfig": {
+                    "width": width,
+                    "height": height,
+                    "quality": quality,
+                    "cfgScale": cfg_scale,
+                    "seed": seed,
+                    "numberOfImages": number_of_images,
                 },
-            ],
-        }
+            }
+            request = json.dumps(nova_request)
+
+            # Invoke the model
+            response = client.invoke_model(modelId=model_id, body=request)
+
+            # Decode the response body
+            model_response = json.loads(response["body"].read().decode("utf-8"))
+
+            # Extract the image data
+            base64_image_data = model_response["images"][0]
+
+        else:
+            return {
+                "toolUseId": tool_use_id,
+                "status": "error",
+                "content": [
+                    {
+                        "text": "Supported models for this tool are: \n \
+                              1.stability.sd3-5-large-v1:0 \n \
+                              2. stability.stable-image-core-v1:1 \n \
+                              3. stability.stable-image-ultra-v1:1 \n \
+                              4. amazon.nova-canvas-v1:0"
+                    }
+                ],
+            }
+
+        # If we have image data, process and save it
+        if base64_image_data:
+            filename = create_filename(prompt)
+
+            # Save the generated image to a local folder
+            output_dir = "output"
+            if not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+
+            i = 1
+            base_image_path = os.path.join(output_dir, f"{filename}.png")
+            image_path = base_image_path
+            while os.path.exists(image_path):
+                image_path = os.path.join(output_dir, f"{filename}_{i}.png")
+                i += 1
+
+            with open(image_path, "wb") as file:
+                file.write(base64.b64decode(base64_image_data))
+
+            return {
+                "toolUseId": tool_use_id,
+                "status": "success",
+                "content": [
+                    {"text": f"The generated image has been saved locally to {image_path}. "},
+                    {
+                        "image": {
+                            "format": output_format,
+                            "source": {"bytes": base64.b64decode(base64_image_data)},
+                        }
+                    },
+                ],
+            }
 
     except Exception as e:
         return {