Skip to content

Commit ea51c04

Browse files
daniel-lxsmrubenscte
authored andcommitted
feat: add image generation tool with OpenRouter integration (#7474)
Co-authored-by: Matt Rubens <[email protected]> Co-authored-by: cte <[email protected]>
1 parent 3eb9d12 commit ea51c04

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+197
-1274
lines changed

packages/types/npm/package.metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@roo-code/types",
3-
"version": "1.74.0",
3+
"version": "1.63.0",
44
"description": "TypeScript type definitions for Roo Code.",
55
"publishConfig": {
66
"access": "public",

packages/types/src/experiment.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ export const experimentIds = [
1111
"multiFileApplyDiff",
1212
"preventFocusDisruption",
1313
"imageGeneration",
14-
"runSlashCommand",
1514
] as const
1615

1716
export const experimentIdsSchema = z.enum(experimentIds)
@@ -27,7 +26,6 @@ export const experimentsSchema = z.object({
2726
multiFileApplyDiff: z.boolean().optional(),
2827
preventFocusDisruption: z.boolean().optional(),
2928
imageGeneration: z.boolean().optional(),
30-
runSlashCommand: z.boolean().optional(),
3129
})
3230

3331
export type Experiments = z.infer<typeof experimentsSchema>

packages/types/src/provider-settings.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,13 @@ const openRouterSchema = baseProviderSettingsSchema.extend({
143143
openRouterBaseUrl: z.string().optional(),
144144
openRouterSpecificProvider: z.string().optional(),
145145
openRouterUseMiddleOutTransform: z.boolean().optional(),
146+
// Image generation settings (experimental)
147+
openRouterImageGenerationSettings: z
148+
.object({
149+
openRouterApiKey: z.string().optional(),
150+
selectedModel: z.string().optional(),
151+
})
152+
.optional(),
146153
})
147154

148155
const bedrockSchema = apiModelIdProviderModelSchema.extend({

packages/types/src/tool.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ export const toolNames = [
3434
"fetch_instructions",
3535
"codebase_search",
3636
"update_todo_list",
37-
"run_slash_command",
3837
"generate_image",
3938
] as const
4039

pnpm-lock.yaml

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/api/providers/openrouter.ts

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import { getModelEndpoints } from "./fetchers/modelEndpointCache"
2525
import { DEFAULT_HEADERS } from "./constants"
2626
import { BaseProvider } from "./base-provider"
2727
import type { SingleCompletionHandler } from "../index"
28-
import { handleOpenAIError } from "./utils/openai-error-handler"
2928

3029
// Image generation types
3130
interface ImageGenerationResponse {
@@ -86,7 +85,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
8685
private client: OpenAI
8786
protected models: ModelRecord = {}
8887
protected endpoints: ModelRecord = {}
89-
private readonly providerName = "OpenRouter"
9088

9189
constructor(options: ApiHandlerOptions) {
9290
super()
@@ -163,12 +161,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
163161
...(reasoning && { reasoning }),
164162
}
165163

166-
let stream
167-
try {
168-
stream = await this.client.chat.completions.create(completionParams)
169-
} catch (error) {
170-
throw handleOpenAIError(error, this.providerName)
171-
}
164+
const stream = await this.client.chat.completions.create(completionParams)
172165

173166
let lastUsage: CompletionUsage | undefined = undefined
174167

@@ -266,12 +259,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
266259
...(reasoning && { reasoning }),
267260
}
268261

269-
let response
270-
try {
271-
response = await this.client.chat.completions.create(completionParams)
272-
} catch (error) {
273-
throw handleOpenAIError(error, this.providerName)
274-
}
262+
const response = await this.client.chat.completions.create(completionParams)
275263

276264
if ("error" in response) {
277265
const error = response.error as { message?: string; code?: number }
@@ -287,15 +275,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
287275
* @param prompt The text prompt for image generation
288276
* @param model The model to use for generation
289277
* @param apiKey The OpenRouter API key (must be explicitly provided)
290-
* @param inputImage Optional base64 encoded input image data URL
291278
* @returns The generated image data and format, or an error
292279
*/
293-
async generateImage(
294-
prompt: string,
295-
model: string,
296-
apiKey: string,
297-
inputImage?: string,
298-
): Promise<ImageGenerationResult> {
280+
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
299281
if (!apiKey) {
300282
return {
301283
success: false,
@@ -317,20 +299,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
317299
messages: [
318300
{
319301
role: "user",
320-
content: inputImage
321-
? [
322-
{
323-
type: "text",
324-
text: prompt,
325-
},
326-
{
327-
type: "image_url",
328-
image_url: {
329-
url: inputImage,
330-
},
331-
},
332-
]
333-
: prompt,
302+
content: prompt,
334303
},
335304
],
336305
modalities: ["image", "text"],

src/core/assistant-message/presentAssistantMessage.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import { attemptCompletionTool } from "../tools/attemptCompletionTool"
2828
import { newTaskTool } from "../tools/newTaskTool"
2929

3030
import { updateTodoListTool } from "../tools/updateTodoListTool"
31-
import { runSlashCommandTool } from "../tools/runSlashCommandTool"
3231
import { generateImageTool } from "../tools/generateImageTool"
3332

3433
import { formatResponse } from "../prompts/responses"
@@ -224,8 +223,6 @@ export async function presentAssistantMessage(cline: Task) {
224223
const modeName = getModeBySlug(mode, customModes)?.name ?? mode
225224
return `[${block.name} in ${modeName} mode: '${message}']`
226225
}
227-
case "run_slash_command":
228-
return `[${block.name} for '${block.params.command}'${block.params.args ? ` with args: ${block.params.args}` : ""}]`
229226
case "generate_image":
230227
return `[${block.name} for '${block.params.path}']`
231228
}
@@ -558,9 +555,6 @@ export async function presentAssistantMessage(cline: Task) {
558555
askFinishSubTaskApproval,
559556
)
560557
break
561-
case "run_slash_command":
562-
await runSlashCommandTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag)
563-
break
564558
case "generate_image":
565559
await generateImageTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag)
566560
break

src/core/prompts/tools/generate-image.ts

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,19 @@ import { ToolArgs } from "./types"
22

33
export function getGenerateImageDescription(args: ToolArgs): string {
44
return `## generate_image
5-
Description: Request to generate or edit an image using AI models through OpenRouter API. This tool can create new images from text prompts or modify existing images based on your instructions. When an input image is provided, the AI will apply the requested edits, transformations, or enhancements to that image.
5+
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
66
Parameters:
7-
- prompt: (required) The text prompt describing what to generate or how to edit the image
8-
- path: (required) The file path where the generated/edited image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
9-
- image: (optional) The file path to an input image to edit or transform (relative to the current workspace directory ${args.cwd}). Supported formats: PNG, JPG, JPEG, GIF, WEBP.
7+
- prompt: (required) The text prompt describing the image to generate
8+
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
109
Usage:
1110
<generate_image>
1211
<prompt>Your image description here</prompt>
1312
<path>path/to/save/image.png</path>
14-
<image>path/to/input/image.jpg</image>
1513
</generate_image>
1614
1715
Example: Requesting to generate a sunset image
1816
<generate_image>
1917
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
2018
<path>images/sunset.png</path>
21-
</generate_image>
22-
23-
Example: Editing an existing image
24-
<generate_image>
25-
<prompt>Transform this image into a watercolor painting style</prompt>
26-
<path>images/watercolor-output.png</path>
27-
<image>images/original-photo.jpg</image>
28-
</generate_image>
29-
30-
Example: Upscaling and enhancing an image
31-
<generate_image>
32-
<prompt>Upscale this image to higher resolution, enhance details, improve clarity and sharpness while maintaining the original content and composition</prompt>
33-
<path>images/enhanced-photo.png</path>
34-
<image>images/low-res-photo.jpg</image>
3519
</generate_image>`
3620
}

src/core/prompts/tools/index.ts

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import { getSwitchModeDescription } from "./switch-mode"
2525
import { getNewTaskDescription } from "./new-task"
2626
import { getCodebaseSearchDescription } from "./codebase-search"
2727
import { getUpdateTodoListDescription } from "./update-todo-list"
28-
import { getRunSlashCommandDescription } from "./run-slash-command"
2928
import { getGenerateImageDescription } from "./generate-image"
3029
import { CodeIndexManager } from "../../../services/code-index/manager"
3130

@@ -58,7 +57,6 @@ const toolDescriptionMap: Record<string, (args: ToolArgs) => string | undefined>
5857
apply_diff: (args) =>
5958
args.diffStrategy ? args.diffStrategy.getToolDescription({ cwd: args.cwd, toolOptions: args.toolOptions }) : "",
6059
update_todo_list: (args) => getUpdateTodoListDescription(args),
61-
run_slash_command: () => getRunSlashCommandDescription(),
6260
generate_image: (args) => getGenerateImageDescription(args),
6361
}
6462

@@ -138,11 +136,6 @@ export function getToolDescriptionsForMode(
138136
tools.delete("generate_image")
139137
}
140138

141-
// Conditionally exclude run_slash_command if experiment is not enabled
142-
if (!experiments?.runSlashCommand) {
143-
tools.delete("run_slash_command")
144-
}
145-
146139
// Map tool descriptions for allowed tools
147140
const descriptions = Array.from(tools).map((toolName) => {
148141
const descriptionFn = toolDescriptionMap[toolName]
@@ -178,6 +171,5 @@ export {
178171
getInsertContentDescription,
179172
getSearchAndReplaceDescription,
180173
getCodebaseSearchDescription,
181-
getRunSlashCommandDescription,
182174
getGenerateImageDescription,
183175
}

0 commit comments

Comments
 (0)