Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 166 additions & 12 deletions src/api/providers/__tests__/lite-llm.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,9 @@ import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
vi.mock("vscode", () => ({}))

// Mock OpenAI
vi.mock("openai", () => {
const mockStream = {
[Symbol.asyncIterator]: vi.fn(),
}

const mockCreate = vi.fn().mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})
const mockCreate = vi.fn()

vi.mock("openai", () => {
return {
default: vi.fn().mockImplementation(() => ({
chat: {
Expand All @@ -35,14 +29,22 @@ vi.mock("../fetchers/modelCache", () => ({
getModels: vi.fn().mockImplementation(() => {
return Promise.resolve({
[litellmDefaultModelId]: litellmDefaultModelInfo,
"gpt-5": { ...litellmDefaultModelInfo, maxTokens: 8192 },
gpt5: { ...litellmDefaultModelInfo, maxTokens: 8192 },
"GPT-5": { ...litellmDefaultModelInfo, maxTokens: 8192 },
"gpt-5-turbo": { ...litellmDefaultModelInfo, maxTokens: 8192 },
"gpt5-preview": { ...litellmDefaultModelInfo, maxTokens: 8192 },
"gpt-4": { ...litellmDefaultModelInfo, maxTokens: 8192 },
"claude-3-opus": { ...litellmDefaultModelInfo, maxTokens: 8192 },
"llama-3": { ...litellmDefaultModelInfo, maxTokens: 8192 },
"gpt-4-turbo": { ...litellmDefaultModelInfo, maxTokens: 8192 },
})
}),
}))

describe("LiteLLMHandler", () => {
let handler: LiteLLMHandler
let mockOptions: ApiHandlerOptions
let mockOpenAIClient: any

beforeEach(() => {
vi.clearAllMocks()
Expand All @@ -52,7 +54,6 @@ describe("LiteLLMHandler", () => {
litellmModelId: litellmDefaultModelId,
}
handler = new LiteLLMHandler(mockOptions)
mockOpenAIClient = new OpenAI()
})

describe("prompt caching", () => {
Expand Down Expand Up @@ -85,7 +86,7 @@ describe("LiteLLMHandler", () => {
},
}

mockOpenAIClient.chat.completions.create.mockReturnValue({
mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

Expand All @@ -96,7 +97,7 @@ describe("LiteLLMHandler", () => {
}

// Verify that create was called with cache control headers
const createCall = mockOpenAIClient.chat.completions.create.mock.calls[0][0]
const createCall = mockCreate.mock.calls[0][0]

// Check system message has cache control in the proper format
expect(createCall.messages[0]).toMatchObject({
Expand Down Expand Up @@ -155,4 +156,157 @@ describe("LiteLLMHandler", () => {
})
})
})

describe("GPT-5 model handling", () => {
it("should use max_completion_tokens instead of max_tokens for GPT-5 models", async () => {
const optionsWithGPT5: ApiHandlerOptions = {
...mockOptions,
litellmModelId: "gpt-5",
}
handler = new LiteLLMHandler(optionsWithGPT5)

const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]

// Mock the stream response
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Hello!" } }],
usage: {
prompt_tokens: 10,
completion_tokens: 5,
},
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
const results = []
for await (const chunk of generator) {
results.push(chunk)
}

// Verify that create was called with max_completion_tokens instead of max_tokens
const createCall = mockCreate.mock.calls[0][0]

// Should have max_completion_tokens, not max_tokens
expect(createCall.max_completion_tokens).toBeDefined()
expect(createCall.max_tokens).toBeUndefined()
})

it("should use max_completion_tokens for various GPT-5 model variations", async () => {
const gpt5Variations = ["gpt-5", "gpt5", "GPT-5", "gpt-5-turbo", "gpt5-preview"]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great test coverage! Consider adding edge cases like mixed case variations ("GpT-5", "gPt5") or models with additional suffixes ("gpt-5-32k", "gpt-5-vision") to ensure the detection works correctly for all possible GPT-5 model names.


for (const modelId of gpt5Variations) {
vi.clearAllMocks()

const optionsWithGPT5: ApiHandlerOptions = {
...mockOptions,
litellmModelId: modelId,
}
handler = new LiteLLMHandler(optionsWithGPT5)

const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }]

// Mock the stream response
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Response" } }],
usage: {
prompt_tokens: 10,
completion_tokens: 5,
},
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const chunk of generator) {
// Consume the generator
}

// Verify that create was called with max_completion_tokens for this model variation
const createCall = mockCreate.mock.calls[0][0]

expect(createCall.max_completion_tokens).toBeDefined()
expect(createCall.max_tokens).toBeUndefined()
}
})

it("should still use max_tokens for non-GPT-5 models", async () => {
const nonGPT5Models = ["gpt-4", "claude-3-opus", "llama-3", "gpt-4-turbo"]

for (const modelId of nonGPT5Models) {
vi.clearAllMocks()

const options: ApiHandlerOptions = {
...mockOptions,
litellmModelId: modelId,
}
handler = new LiteLLMHandler(options)

const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }]

// Mock the stream response
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Response" } }],
usage: {
prompt_tokens: 10,
completion_tokens: 5,
},
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const chunk of generator) {
// Consume the generator
}

// Verify that create was called with max_tokens for non-GPT-5 models
const createCall = mockCreate.mock.calls[0][0]

expect(createCall.max_tokens).toBeDefined()
expect(createCall.max_completion_tokens).toBeUndefined()
}
})

it("should use max_completion_tokens in completePrompt for GPT-5 models", async () => {
const optionsWithGPT5: ApiHandlerOptions = {
...mockOptions,
litellmModelId: "gpt-5",
}
handler = new LiteLLMHandler(optionsWithGPT5)

mockCreate.mockResolvedValue({
choices: [{ message: { content: "Test response" } }],
})

await handler.completePrompt("Test prompt")

// Verify that create was called with max_completion_tokens
const createCall = mockCreate.mock.calls[0][0]

expect(createCall.max_completion_tokens).toBeDefined()
expect(createCall.max_tokens).toBeUndefined()
})
})
})
23 changes: 21 additions & 2 deletions src/api/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,26 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
// Required by some providers; others default to max tokens allowed
let maxTokens: number | undefined = info.maxTokens ?? undefined

// Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens
const isGPT5Model = modelId.toLowerCase().includes("gpt-5") || modelId.toLowerCase().includes("gpt5")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting the GPT-5 model detection logic into a shared helper function. This logic (using modelId.toLowerCase().includes('gpt-5') || modelId.toLowerCase().includes('gpt5')) appears in both createMessage and completePrompt, and centralizing it would improve maintainability.

This comment was generated because it violated a code review rule: irule_tTqpIuNs8DV0QFGj.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The model detection logic could be more precise. Currently, modelId.toLowerCase().includes("gpt-5") would match unintended models like "not-gpt-5000". Consider using a more specific pattern:

Suggested change
const isGPT5Model = modelId.toLowerCase().includes("gpt-5") || modelId.toLowerCase().includes("gpt5")
// Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens
const modelLower = modelId.toLowerCase()
const isGPT5Model = modelLower.startsWith("gpt-5") || modelLower.startsWith("gpt5") || modelLower === "gpt5"

Copy link

Copilot AI Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The GPT-5 model detection logic is duplicated between the createMessage and completePrompt methods. Consider extracting this into a private helper method to improve maintainability and ensure consistency.

Copilot uses AI. Check for mistakes.

const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelId,
max_tokens: maxTokens,
messages: [systemMessage, ...enhancedMessages],
stream: true,
stream_options: {
include_usage: true,
},
}

// GPT-5 models require max_completion_tokens instead of the deprecated max_tokens parameter
if (isGPT5Model && maxTokens) {
// @ts-ignore - max_completion_tokens is not in the OpenAI types yet but is supported
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to avoid using @ts-ignore here? Could we extend the OpenAI types or create a custom interface that includes max_completion_tokens to maintain type safety? For example:

interface GPT5RequestOptions extends Omit<OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, 'max_tokens'> {
  max_completion_tokens?: number
  max_tokens?: never
}

requestOptions.max_completion_tokens = maxTokens
Copy link

Copilot AI Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using @ts-ignore to suppress TypeScript errors for max_completion_tokens is not ideal. Consider using type assertion with a more specific interface or extending the OpenAI types to include this property for better type safety.

Copilot uses AI. Check for mistakes.
} else if (maxTokens) {
requestOptions.max_tokens = maxTokens
}

if (this.supportsTemperature(modelId)) {
requestOptions.temperature = this.options.modelTemperature ?? 0
}
Expand Down Expand Up @@ -179,6 +189,9 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
async completePrompt(prompt: string): Promise<string> {
const { id: modelId, info } = await this.fetchModel()

// Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens
const isGPT5Model = modelId.toLowerCase().includes("gpt-5") || modelId.toLowerCase().includes("gpt5")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This detection logic is duplicated from line 111. Would it be cleaner to extract this into a helper method to maintain DRY principles? Something like:

private isGPT5Model(modelId: string): boolean {
  const modelLower = modelId.toLowerCase()
  return modelLower.startsWith("gpt-5") || modelLower.startsWith("gpt5") || modelLower === "gpt5"
}


try {
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: modelId,
Expand All @@ -189,7 +202,13 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
requestOptions.temperature = this.options.modelTemperature ?? 0
}

requestOptions.max_tokens = info.maxTokens
// GPT-5 models require max_completion_tokens instead of the deprecated max_tokens parameter
if (isGPT5Model && info.maxTokens) {
// @ts-ignore - max_completion_tokens is not in the OpenAI types yet but is supported
requestOptions.max_completion_tokens = info.maxTokens
Copy link

Copilot AI Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a duplicate of the previous @ts-ignore comment. The same type safety concerns apply here - consider using a consistent approach to handle the missing type definition.

Copilot uses AI. Check for mistakes.
} else if (info.maxTokens) {
requestOptions.max_tokens = info.maxTokens
}

const response = await this.client.chat.completions.create(requestOptions)
return response.choices[0]?.message.content || ""
Expand Down
Loading