Skip to content
8 changes: 7 additions & 1 deletion packages/types/src/experiment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js"
* ExperimentId
*/

export const experimentIds = ["powerSteering", "multiFileApplyDiff", "preventFocusDisruption"] as const
export const experimentIds = [
"powerSteering",
"multiFileApplyDiff",
"preventFocusDisruption",
"imageGeneration",
] as const

export const experimentIdsSchema = z.enum(experimentIds)

Expand All @@ -20,6 +25,7 @@ export const experimentsSchema = z.object({
powerSteering: z.boolean().optional(),
multiFileApplyDiff: z.boolean().optional(),
preventFocusDisruption: z.boolean().optional(),
imageGeneration: z.boolean().optional(),
})

export type Experiments = z.infer<typeof experimentsSchema>
Expand Down
8 changes: 8 additions & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ const baseProviderSettingsSchema = z.object({

// Model verbosity.
verbosity: verbosityLevelsSchema.optional(),

// Image generation settings (experimental)
imageGenerationSettings: z
.object({
openRouterApiKey: z.string().optional(),
selectedModel: z.string().optional(),
})
.optional(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we can remove this from the base provider schema

})

// Several of the providers share common model config properties.
Expand Down
1 change: 1 addition & 0 deletions packages/types/src/tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export const toolNames = [
"fetch_instructions",
"codebase_search",
"update_todo_list",
"generate_image",
] as const

export const toolNamesSchema = z.enum(toolNames)
Expand Down
128 changes: 128 additions & 0 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,33 @@ import { DEFAULT_HEADERS } from "./constants"
import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler } from "../index"

// Image generation types
interface ImageGenerationResponse {
choices?: Array<{
message?: {
content?: string
images?: Array<{
type?: string
image_url?: {
url?: string
}
}>
}
}>
error?: {
message?: string
type?: string
code?: string
}
}

export interface ImageGenerationResult {
success: boolean
imageData?: string
imageFormat?: string
error?: string
}

// Add custom interface for OpenRouter params.
type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
transforms?: string[]
Expand Down Expand Up @@ -242,4 +269,105 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
const completion = response as OpenAI.Chat.ChatCompletion
return completion.choices[0]?.message?.content || ""
}

/**
* Generate an image using OpenRouter's image generation API
* @param prompt The text prompt for image generation
* @param model The model to use for generation
* @param apiKey The OpenRouter API key (must be explicitly provided)
* @returns The generated image data and format, or an error
*/
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
if (!apiKey) {
return {
success: false,
error: "OpenRouter API key is required for image generation",
}
}

try {
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/RooVetGit/Roo-Code",
"X-Title": "Roo Code",
},
body: JSON.stringify({
model,
messages: [
{
role: "user",
content: prompt,
},
],
modalities: ["image", "text"],
}),
})

if (!response.ok) {
const errorText = await response.text()
let errorMessage = `Failed to generate image: ${response.status} ${response.statusText}`
try {
const errorJson = JSON.parse(errorText)
if (errorJson.error?.message) {
errorMessage = `Failed to generate image: ${errorJson.error.message}`
}
} catch {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the JSON.parse try/catch block (lines 312-319), consider logging the caught error (e.g. using console.error) to aid debugging of parsing failures.

Suggested change
} catch {
} catch (err) { console.error(err)

This comment was generated because it violated a code review rule: irule_PTI8rjtnhwrWq6jS.

// Use default error message
}
return {
success: false,
error: errorMessage,
}
}

const result: ImageGenerationResponse = await response.json()

if (result.error) {
return {
success: false,
error: `Failed to generate image: ${result.error.message}`,
}
}

// Extract the generated image from the response
const images = result.choices?.[0]?.message?.images
if (!images || images.length === 0) {
return {
success: false,
error: "No image was generated in the response",
}
}

const imageData = images[0]?.image_url?.url
if (!imageData) {
return {
success: false,
error: "Invalid image data in response",
}
}

// Extract base64 data from data URL
const base64Match = imageData.match(/^data:image\/(png|jpeg|jpg);base64,(.+)$/)
if (!base64Match) {
return {
success: false,
error: "Invalid image format received",
}
}

return {
success: true,
imageData: imageData,
imageFormat: base64Match[1],
}
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : "Unknown error occurred",
}
}
}
}
6 changes: 6 additions & 0 deletions src/core/assistant-message/presentAssistantMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { attemptCompletionTool } from "../tools/attemptCompletionTool"
import { newTaskTool } from "../tools/newTaskTool"

import { updateTodoListTool } from "../tools/updateTodoListTool"
import { generateImageTool } from "../tools/generateImageTool"

import { formatResponse } from "../prompts/responses"
import { validateToolUse } from "../tools/validateToolUse"
Expand Down Expand Up @@ -221,6 +222,8 @@ export async function presentAssistantMessage(cline: Task) {
const modeName = getModeBySlug(mode, customModes)?.name ?? mode
return `[${block.name} in ${modeName} mode: '${message}']`
}
case "generate_image":
return `[${block.name} for '${block.params.path}']`
}
}

Expand Down Expand Up @@ -546,6 +549,9 @@ export async function presentAssistantMessage(cline: Task) {
askFinishSubTaskApproval,
)
break
case "generate_image":
await generateImageTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag)
break
}

break
Expand Down
20 changes: 20 additions & 0 deletions src/core/prompts/tools/generate-image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { ToolArgs } from "./types"

export function getGenerateImageDescription(args: ToolArgs): string {
return `## generate_image
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
Parameters:
- prompt: (required) The text prompt describing the image to generate
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
Usage:
<generate_image>
<prompt>Your image description here</prompt>
<path>path/to/save/image.png</path>
</generate_image>

Example: Requesting to generate a sunset image
<generate_image>
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
<path>images/sunset.png</path>
</generate_image>`
}
8 changes: 8 additions & 0 deletions src/core/prompts/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { getSwitchModeDescription } from "./switch-mode"
import { getNewTaskDescription } from "./new-task"
import { getCodebaseSearchDescription } from "./codebase-search"
import { getUpdateTodoListDescription } from "./update-todo-list"
import { getGenerateImageDescription } from "./generate-image"
import { CodeIndexManager } from "../../../services/code-index/manager"

// Map of tool names to their description functions
Expand Down Expand Up @@ -56,6 +57,7 @@ const toolDescriptionMap: Record<string, (args: ToolArgs) => string | undefined>
apply_diff: (args) =>
args.diffStrategy ? args.diffStrategy.getToolDescription({ cwd: args.cwd, toolOptions: args.toolOptions }) : "",
update_todo_list: (args) => getUpdateTodoListDescription(args),
generate_image: (args) => getGenerateImageDescription(args),
}

export function getToolDescriptionsForMode(
Expand Down Expand Up @@ -129,6 +131,11 @@ export function getToolDescriptionsForMode(
tools.delete("update_todo_list")
}

// Conditionally exclude generate_image if experiment is not enabled
if (!experiments?.imageGeneration) {
tools.delete("generate_image")
}

// Map tool descriptions for allowed tools
const descriptions = Array.from(tools).map((toolName) => {
const descriptionFn = toolDescriptionMap[toolName]
Expand Down Expand Up @@ -164,4 +171,5 @@ export {
getInsertContentDescription,
getSearchAndReplaceDescription,
getCodebaseSearchDescription,
getGenerateImageDescription,
}
Loading
Loading