Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/types/npm/package.metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@roo-code/types",
"version": "1.62.0",
"version": "1.63.0",
"description": "TypeScript type definitions for Roo Code.",
"publishConfig": {
"access": "public",
Expand Down
8 changes: 7 additions & 1 deletion packages/types/src/experiment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js"
* ExperimentId
*/

export const experimentIds = ["powerSteering", "multiFileApplyDiff", "preventFocusDisruption"] as const
export const experimentIds = [
"powerSteering",
"multiFileApplyDiff",
"preventFocusDisruption",
"imageGeneration",
] as const

export const experimentIdsSchema = z.enum(experimentIds)

Expand All @@ -20,6 +25,7 @@ export const experimentsSchema = z.object({
powerSteering: z.boolean().optional(),
multiFileApplyDiff: z.boolean().optional(),
preventFocusDisruption: z.boolean().optional(),
imageGeneration: z.boolean().optional(),
})

export type Experiments = z.infer<typeof experimentsSchema>
Expand Down
7 changes: 7 additions & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ const openRouterSchema = baseProviderSettingsSchema.extend({
openRouterBaseUrl: z.string().optional(),
openRouterSpecificProvider: z.string().optional(),
openRouterUseMiddleOutTransform: z.boolean().optional(),
// Image generation settings (experimental)
openRouterImageGenerationSettings: z
.object({
openRouterApiKey: z.string().optional(),
selectedModel: z.string().optional(),
})
.optional(),
})

const bedrockSchema = apiModelIdProviderModelSchema.extend({
Expand Down
1 change: 1 addition & 0 deletions packages/types/src/tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export const toolNames = [
"fetch_instructions",
"codebase_search",
"update_todo_list",
"generate_image",
] as const

export const toolNamesSchema = z.enum(toolNames)
Expand Down
18 changes: 9 additions & 9 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

128 changes: 128 additions & 0 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,33 @@ import { DEFAULT_HEADERS } from "./constants"
import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler } from "../index"

// Image generation types
interface ImageGenerationResponse {
choices?: Array<{
message?: {
content?: string
images?: Array<{
type?: string
image_url?: {
url?: string
}
}>
}
}>
error?: {
message?: string
type?: string
code?: string
}
}

export interface ImageGenerationResult {
success: boolean
imageData?: string
imageFormat?: string
error?: string
}

// Add custom interface for OpenRouter params.
type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
transforms?: string[]
Expand Down Expand Up @@ -242,4 +269,105 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
const completion = response as OpenAI.Chat.ChatCompletion
return completion.choices[0]?.message?.content || ""
}

/**
* Generate an image using OpenRouter's image generation API
* @param prompt The text prompt for image generation
* @param model The model to use for generation
* @param apiKey The OpenRouter API key (must be explicitly provided)
* @returns The generated image data and format, or an error
*/
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
if (!apiKey) {
return {
success: false,
error: "OpenRouter API key is required for image generation",
}
}

try {
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/RooVetGit/Roo-Code",
"X-Title": "Roo Code",
},
body: JSON.stringify({
model,
messages: [
{
role: "user",
content: prompt,
},
],
modalities: ["image", "text"],
}),
})

if (!response.ok) {
const errorText = await response.text()
let errorMessage = `Failed to generate image: ${response.status} ${response.statusText}`
try {
const errorJson = JSON.parse(errorText)
if (errorJson.error?.message) {
errorMessage = `Failed to generate image: ${errorJson.error.message}`
}
} catch {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the JSON.parse try/catch block (lines 312-319), consider logging the caught error (e.g. using console.error) to aid debugging of parsing failures.

Suggested change
} catch {
} catch (err) { console.error(err)

This comment was generated because it violated a code review rule: irule_PTI8rjtnhwrWq6jS.

// Use default error message
}
return {
success: false,
error: errorMessage,
}
}

const result: ImageGenerationResponse = await response.json()

if (result.error) {
return {
success: false,
error: `Failed to generate image: ${result.error.message}`,
}
}

// Extract the generated image from the response
const images = result.choices?.[0]?.message?.images
if (!images || images.length === 0) {
return {
success: false,
error: "No image was generated in the response",
}
}

const imageData = images[0]?.image_url?.url
if (!imageData) {
return {
success: false,
error: "Invalid image data in response",
}
}

// Extract base64 data from data URL
const base64Match = imageData.match(/^data:image\/(png|jpeg|jpg);base64,(.+)$/)
if (!base64Match) {
return {
success: false,
error: "Invalid image format received",
}
}

return {
success: true,
imageData: imageData,
imageFormat: base64Match[1],
}
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : "Unknown error occurred",
}
}
}
}
6 changes: 6 additions & 0 deletions src/core/assistant-message/presentAssistantMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { attemptCompletionTool } from "../tools/attemptCompletionTool"
import { newTaskTool } from "../tools/newTaskTool"

import { updateTodoListTool } from "../tools/updateTodoListTool"
import { generateImageTool } from "../tools/generateImageTool"

import { formatResponse } from "../prompts/responses"
import { validateToolUse } from "../tools/validateToolUse"
Expand Down Expand Up @@ -221,6 +222,8 @@ export async function presentAssistantMessage(cline: Task) {
const modeName = getModeBySlug(mode, customModes)?.name ?? mode
return `[${block.name} in ${modeName} mode: '${message}']`
}
case "generate_image":
return `[${block.name} for '${block.params.path}']`
}
}

Expand Down Expand Up @@ -546,6 +549,9 @@ export async function presentAssistantMessage(cline: Task) {
askFinishSubTaskApproval,
)
break
case "generate_image":
await generateImageTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag)
break
}

break
Expand Down
20 changes: 20 additions & 0 deletions src/core/prompts/tools/generate-image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { ToolArgs } from "./types"

export function getGenerateImageDescription(args: ToolArgs): string {
return `## generate_image
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
Parameters:
- prompt: (required) The text prompt describing the image to generate
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
Usage:
<generate_image>
<prompt>Your image description here</prompt>
<path>path/to/save/image.png</path>
</generate_image>

Example: Requesting to generate a sunset image
<generate_image>
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
<path>images/sunset.png</path>
</generate_image>`
}
8 changes: 8 additions & 0 deletions src/core/prompts/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { getSwitchModeDescription } from "./switch-mode"
import { getNewTaskDescription } from "./new-task"
import { getCodebaseSearchDescription } from "./codebase-search"
import { getUpdateTodoListDescription } from "./update-todo-list"
import { getGenerateImageDescription } from "./generate-image"
import { CodeIndexManager } from "../../../services/code-index/manager"

// Map of tool names to their description functions
Expand Down Expand Up @@ -56,6 +57,7 @@ const toolDescriptionMap: Record<string, (args: ToolArgs) => string | undefined>
apply_diff: (args) =>
args.diffStrategy ? args.diffStrategy.getToolDescription({ cwd: args.cwd, toolOptions: args.toolOptions }) : "",
update_todo_list: (args) => getUpdateTodoListDescription(args),
generate_image: (args) => getGenerateImageDescription(args),
}

export function getToolDescriptionsForMode(
Expand Down Expand Up @@ -129,6 +131,11 @@ export function getToolDescriptionsForMode(
tools.delete("update_todo_list")
}

// Conditionally exclude generate_image if experiment is not enabled
if (!experiments?.imageGeneration) {
tools.delete("generate_image")
}

// Map tool descriptions for allowed tools
const descriptions = Array.from(tools).map((toolName) => {
const descriptionFn = toolDescriptionMap[toolName]
Expand Down Expand Up @@ -164,4 +171,5 @@ export {
getInsertContentDescription,
getSearchAndReplaceDescription,
getCodebaseSearchDescription,
getGenerateImageDescription,
}
Loading
Loading