Skip to content
Merged
308 changes: 213 additions & 95 deletions src/strands_tools/generate_image.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
"""
Image generation tool for Strands Agent using Stable Diffusion.
Image generation tool for Strands Agent using Stable Diffusion and Nova Canvas models.

This module provides functionality to generate high-quality images using Amazon Bedrock's
Stable Diffusion models based on text prompts. It handles the entire image generation
image generation models based on text prompts. It handles the entire image generation
process including API integration, parameter management, response processing, and
local storage of results.

Key Features:

1. Image Generation:
• Text-to-image conversion using Stable Diffusion
• Support for multiple model variants (primarily stable-diffusion-xl-v1)
• Customizable generation parameters (seed, steps, cfg_scale)
• Style preset selection for consistent aesthetics
• Text-to-image conversion using multiple model providers
• Support for the following models:
• stability.sd3-5-large-v1:0
• stability.stable-image-core-v1:1
• stability.stable-image-ultra-v1:1
• amazon.nova-canvas-v1:0
• Customizable generation parameters (seed, aspect_ratio, output_format, negative_prompt)

2. Output Management:
• Automatic local saving with intelligent filename generation
Expand All @@ -36,14 +39,22 @@
# Basic usage with default parameters
agent.tool.generate_image(prompt="A steampunk robot playing chess")

# Advanced usage with custom parameters
# Advanced usage with Stable Diffusion
agent.tool.generate_image(
prompt="A futuristic city with flying cars",
model_id="stability.stable-diffusion-xl-v1",
seed=42,
steps=50,
cfg_scale=12,
style_preset="cinematic"
model_id="stability.sd3-5-large-v1:0",
aspect_ratio="5:4",
output_format="jpeg",
negative_prompt="bad lighting, harsh lighting, abstract, surreal, twisted, multiple levels",
)

# Using Nova Canvas
agent.tool.generate_image(
prompt="A photograph of a cup of coffee from the side",
model_id="amazon.nova-canvas-v1:0",
width=1024,
height=1024,
quality="premium",
)
```

Expand All @@ -60,9 +71,17 @@
import boto3
from strands.types.tools import ToolResult, ToolUse

STABLE_DIFFUSION_MODEL_ID = [
"stability.sd3-5-large-v1:0",
"stability.stable-image-core-v1:1",
"stability.stable-image-ultra-v1:1",
]
NOVA_CANVAS_MODEL_ID = "amazon.nova-canvas-v1:0"


TOOL_SPEC = {
"name": "generate_image",
"description": "Generates an image using Stable Diffusion based on a given prompt",
"description": "Generates an image using Stable Diffusion or Nova Canvas based on a given prompt",
"inputSchema": {
"json": {
"type": "object",
Expand All @@ -73,23 +92,48 @@
},
"model_id": {
"type": "string",
"description": "Model id for image model, stability.stable-diffusion-xl-v1.",
"description": "Model id for image model, stability.sd3-5-large-v1:0, \
stability.stable-image-core-v1:1, stability.stable-image-ultra-v1:1, or amazon.nova-canvas-v1:0",
},
"seed": {
"type": "integer",
"description": "Optional: Seed for random number generation (default: random)",
},
"steps": {
"aspect_ratio": {
"type": "string",
"description": "Optional: Controls the aspect ratio of the generated image for \
Stable Diffusion models. Default 1:1. Enum: 16:9, 1:1, 21:9, 2:3, 3:2, 4:5, 5:4, 9:16, 9:21",
},
"output_format": {
"type": "string",
"description": "Optional: Specifies the format of the output image for Stable Diffusion models. \
Supported formats: JPEG, PNG.",
},
"negative_prompt": {
"type": "string",
"description": "Optional: Keywords of what you do not wish to see in the output image. \
Max: 10.000 characters.",
},
"width": {
"type": "integer",
"description": "Optional: Number of steps for image generation (default: 30)",
"description": "Optional: Width of the generated image for Nova Canvas model (default: 1024)",
},
"height": {
"type": "integer",
"description": "Optional: Height of the generated image for Nova Canvas model (default: 1024)",
},
"quality": {
"type": "string",
"description": "Optional: Quality setting for Nova Canvas model. Options: 'standard' or 'premium' \
(default: 'standard')",
},
"cfg_scale": {
"type": "number",
"description": "Optional: CFG scale for image generation (default: 10)",
"description": "Optional: CFG scale for Nova Canvas model (default: 8.0)",
},
"style_preset": {
"type": "string",
"description": "Optional: Style preset for image generation (default: 'photographic')",
"number_of_images": {
"type": "integer",
"description": "Optional: Number of images to generate for Nova Canvas model (default: 1)",
},
},
"required": ["prompt"],
Expand All @@ -98,19 +142,28 @@
}


# Create a filename based on the prompt
def create_filename(prompt: str) -> str:
"""Generate a filename from the prompt text."""
words = re.findall(r"\w+", prompt.lower())[:5]
filename = "_".join(words)
filename = re.sub(r"[^\w\-_\.]", "_", filename)
return filename[:100] # Limit filename length


def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
"""
Generate images from text prompts using Stable Diffusion via Amazon Bedrock.
Generate images from text prompts using Stable Diffusion or Nova Canvas via Amazon Bedrock.

This function transforms textual descriptions into high-quality images using
Stable Diffusion models available through Amazon Bedrock. It provides extensive
image generation models available through Amazon Bedrock. It provides extensive
customization options and handles the complete process from API interaction to
image storage and result formatting.

How It Works:
------------
1. Extracts and validates parameters from the tool input
2. Configures the request payload with appropriate parameters
2. Configures the request payload with appropriate parameters based on model type
3. Invokes the Bedrock image generation model through AWS SDK
4. Processes the response to extract the base64-encoded image
5. Creates an appropriate filename based on the prompt content
Expand All @@ -120,11 +173,22 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
Generation Parameters:
--------------------
- prompt: The textual description of the desired image
- model_id: Specific model to use (defaults to stable-diffusion-xl-v1)
- model_id: Specific model to use (defaults to stability.stable-image-core-v1:1)

For Stable Diffusion models:
- seed: Controls randomness for reproducible results
- aspect_ratio: Controls the aspect ratio of the generated image
- output_format: Specifies the format of the output image (e.g., png or jpeg)
- negative_prompt: Keywords of what you do not wish to see in the output image

For Nova Canvas model:
- width: Width of the generated image (default: 1024)
- height: Height of the generated image (default: 1024)
- quality: Quality setting ('standard' or 'premium')
- cfg_scale: CFG scale value (default: 8.0)
- number_of_images: Number of images to generate (default: 1)
- seed: Controls randomness for reproducible results
- style_preset: Artistic style to apply (e.g., photographic, cinematic)
- cfg_scale: Controls how closely the image follows the prompt
- steps: Number of diffusion steps (higher = more refined but slower)
- negative_prompt: Keywords of what you do not wish to see in the output image

Common Usage Scenarios:
---------------------
Expand All @@ -137,11 +201,8 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
Args:
tool: ToolUse object containing the parameters for image generation.
- prompt: The text prompt describing the desired image.
- model_id: Optional model identifier (default: "stability.stable-diffusion-xl-v1").
- seed: Optional random seed (default: random integer).
- style_preset: Optional style preset name (default: "photographic").
- cfg_scale: Optional CFG scale value (default: 10).
- steps: Optional number of diffusion steps (default: 30).
- model_id: Optional model identifier.
- Additional parameters specific to the chosen model type.
**kwargs: Additional keyword arguments (unused).

Returns:
Expand All @@ -161,74 +222,131 @@ def generate_image(tool: ToolUse, **kwargs: Any) -> ToolResult:
tool_use_id = tool["toolUseId"]
tool_input = tool["input"]

# Extract input parameters
# Extract common input parameters
prompt = tool_input.get("prompt", "A stylized picture of a cute old steampunk robot.")
model_id = tool_input.get("model_id", "stability.stable-diffusion-xl-v1")
model_id = tool_input.get("model_id", "stability.stable-image-core-v1:1")
region = (
tool_input.get("region", "us-east-1")
if model_id not in STABLE_DIFFUSION_MODEL_ID
else tool_input.get("region", "us-west-2")
)
seed = tool_input.get("seed", random.randint(0, 4294967295))
style_preset = tool_input.get("style_preset", "photographic")
cfg_scale = tool_input.get("cfg_scale", 10)
steps = tool_input.get("steps", 30)
negative_prompt = tool_input.get("negative_prompt", "bad lighting, harsh lighting")

# Create a Bedrock Runtime client
client = boto3.client("bedrock-runtime", region_name="us-west-2")

# Format the request payload
native_request = {
"text_prompts": [{"text": prompt}],
"style_preset": style_preset,
"seed": seed,
"cfg_scale": cfg_scale,
"steps": steps,
}
request = json.dumps(native_request)

# Invoke the model
response = client.invoke_model(modelId=model_id, body=request)

# Decode the response body
model_response = json.loads(response["body"].read())

# Extract the image data
base64_image_data = model_response["artifacts"][0]["base64"]

# Create a filename based on the prompt
def create_filename(prompt: str) -> str:
"""Generate a filename from the prompt text."""
words = re.findall(r"\w+", prompt.lower())[:5]
filename = "_".join(words)
filename = re.sub(r"[^\w\-_\.]", "_", filename)
return filename[:100] # Limit filename length

filename = create_filename(prompt)

# Save the generated image to a local folder
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)

i = 1
base_image_path = os.path.join(output_dir, f"{filename}.png")
image_path = base_image_path
while os.path.exists(image_path):
image_path = os.path.join(output_dir, f"{filename}_{i}.png")
i += 1

with open(image_path, "wb") as file:
file.write(base64.b64decode(base64_image_data))

return {
"toolUseId": tool_use_id,
"status": "success",
"content": [
{"text": f"The generated image has been saved locally to {image_path}. "},
{
"image": {
"format": "png",
"source": {"bytes": base64.b64decode(base64_image_data)},
}
client = boto3.client("bedrock-runtime", region_name=region)

# Initialize variables for later use
base64_image_data = None
output_format = "jpeg" # Default format

# Format the request payload based on model type
if (
model_id == "stability.sd3-5-large-v1:0"
or model_id == "stability.stable-image-core-v1:1"
or model_id == "stability.stable-image-ultra-v1:1"
):
# Stable Diffusion specific parameters
aspect_ratio = tool_input.get("aspect_ratio", "1:1")
output_format = tool_input.get("output_format", "jpeg")

native_request = {
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"seed": seed,
"output_format": output_format,
"negative_prompt": negative_prompt,
}
request = json.dumps(native_request)

# Invoke the model
response = client.invoke_model(modelId=model_id, body=request)

# Decode the response body
model_response = json.loads(response["body"].read().decode("utf-8"))

# Extract the image data
base64_image_data = model_response["images"][0]

elif model_id == "amazon.nova-canvas-v1:0":
# Nova Canvas specific parameters
width = tool_input.get("width", 1024)
height = tool_input.get("height", 1024)
quality = tool_input.get("quality", "standard")
cfg_scale = tool_input.get("cfg_scale", 8.0)
number_of_images = tool_input.get("number_of_images", 1)

# Format the Nova Canvas request
nova_request = {
"taskType": "TEXT_IMAGE",
"textToImageParams": {"text": prompt, "negativeText": negative_prompt},
"imageGenerationConfig": {
"width": width,
"height": height,
"quality": quality,
"cfgScale": cfg_scale,
"seed": seed,
"numberOfImages": number_of_images,
},
],
}
}
request = json.dumps(nova_request)

# Invoke the model
response = client.invoke_model(modelId=model_id, body=request)

# Decode the response body
model_response = json.loads(response["body"].read().decode("utf-8"))

# Extract the image data
base64_image_data = model_response["images"][0]

else:
return {
"toolUseId": tool_use_id,
"status": "error",
"content": [
{
"text": "Supported models for this tool are: \n \
1.stability.sd3-5-large-v1:0 \n \
2. stability.stable-image-core-v1:1 \n \
3. stability.stable-image-ultra-v1:1 \n \
4. amazon.nova-canvas-v1:0"
}
],
}

# If we have image data, process and save it
if base64_image_data:
filename = create_filename(prompt)

# Save the generated image to a local folder
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)

i = 1
base_image_path = os.path.join(output_dir, f"{filename}.png")
image_path = base_image_path
while os.path.exists(image_path):
image_path = os.path.join(output_dir, f"{filename}_{i}.png")
i += 1

with open(image_path, "wb") as file:
file.write(base64.b64decode(base64_image_data))

return {
"toolUseId": tool_use_id,
"status": "success",
"content": [
{"text": f"The generated image has been saved locally to {image_path}. "},
{
"image": {
"format": output_format,
"source": {"bytes": base64.b64decode(base64_image_data)},
}
},
],
}

except Exception as e:
return {
Expand Down
Loading