diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index b4049b812283..2b156cfb51f8 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -2,11 +2,14 @@ import type { ModelInfo } from "../model.js" import { ZaiApiLine } from "../provider-settings.js" // Z AI +// https://docs.z.ai/guides/llm/glm-4-32b-0414-128k // https://docs.z.ai/guides/llm/glm-4.5 +// https://docs.z.ai/guides/llm/glm-4.6 // https://docs.z.ai/guides/overview/pricing +// https://bigmodel.cn/pricing export type InternationalZAiModelId = keyof typeof internationalZAiModels -export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.5" +export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.6" export const internationalZAiModels = { "glm-4.5": { maxTokens: 98_304, @@ -32,9 +35,55 @@ export const internationalZAiModels = { description: "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", }, + "glm-4.5-x": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 2.2, + outputPrice: 8.9, + cacheWritesPrice: 0, + cacheReadsPrice: 0.45, + description: + "GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.", + }, + "glm-4.5-airx": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.1, + outputPrice: 4.5, + cacheWritesPrice: 0, + cacheReadsPrice: 0.22, + description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.", + }, + "glm-4.5-flash": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0, + outputPrice: 0, + cacheWritesPrice: 0, + cacheReadsPrice: 0, + description: "GLM-4.5-Flash is a free, high-speed model excellent for reasoning, coding, and agentic tasks.", + }, + "glm-4.5v": { + maxTokens: 16_384, + contextWindow: 131_072, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.6, + outputPrice: 1.8, + cacheWritesPrice: 0, + cacheReadsPrice: 0.11, + description: + "GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.", + }, "glm-4.6": { maxTokens: 98_304, - contextWindow: 204_800, + contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 0.6, @@ -44,10 +93,21 @@ export const internationalZAiModels = { description: "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", }, + "glm-4-32b-0414-128k": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.1, + outputPrice: 0.1, + cacheWritesPrice: 0, + cacheReadsPrice: 0, + description: "GLM-4-32B is a 32 billion parameter model with 128k context length, optimized for efficiency.", + }, } as const satisfies Record export type MainlandZAiModelId = keyof typeof mainlandZAiModels -export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.5" +export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.6" export const mainlandZAiModels = { "glm-4.5": { maxTokens: 98_304, @@ -60,26 +120,6 @@ export const mainlandZAiModels = { cacheReadsPrice: 0.057, description: "GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.", - tiers: [ - { - contextWindow: 32_000, - inputPrice: 0.21, - outputPrice: 1.0, - cacheReadsPrice: 0.043, - }, - { - contextWindow: 128_000, - inputPrice: 0.29, - outputPrice: 1.14, - cacheReadsPrice: 0.057, - }, - { - contextWindow: Infinity, - inputPrice: 0.29, - outputPrice: 1.14, - cacheReadsPrice: 0.057, - }, - ], }, "glm-4.5-air": { maxTokens: 98_304, @@ -92,26 +132,52 @@ export const mainlandZAiModels = { cacheReadsPrice: 0.02, description: "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", - tiers: [ - { - contextWindow: 32_000, - inputPrice: 0.07, - outputPrice: 0.4, - cacheReadsPrice: 0.014, - }, - { - contextWindow: 128_000, - inputPrice: 0.1, - outputPrice: 0.6, - cacheReadsPrice: 0.02, - }, - { - contextWindow: Infinity, - inputPrice: 0.1, - outputPrice: 0.6, - cacheReadsPrice: 0.02, - }, - ], + }, + "glm-4.5-x": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.29, + outputPrice: 1.14, + cacheWritesPrice: 0, + cacheReadsPrice: 0.057, + description: + "GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.", + }, + "glm-4.5-airx": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.1, + outputPrice: 0.6, + cacheWritesPrice: 0, + cacheReadsPrice: 0.02, + description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.", + }, + "glm-4.5-flash": { + maxTokens: 98_304, + contextWindow: 131_072, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0, + outputPrice: 0, + cacheWritesPrice: 0, + cacheReadsPrice: 0, + description: "GLM-4.5-Flash is a free, high-speed model excellent for reasoning, coding, and agentic tasks.", + }, + "glm-4.5v": { + maxTokens: 16_384, + contextWindow: 131_072, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.29, + outputPrice: 0.93, + cacheWritesPrice: 0, + cacheReadsPrice: 0.057, + description: + "GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.", }, "glm-4.6": { maxTokens: 98_304, @@ -124,45 +190,19 @@ export const mainlandZAiModels = { cacheReadsPrice: 0.057, description: "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", - tiers: [ - { - contextWindow: 32_000, - inputPrice: 0.21, - outputPrice: 1.0, - cacheReadsPrice: 0.043, - }, - { - contextWindow: 128_000, - inputPrice: 0.29, - outputPrice: 1.14, - cacheReadsPrice: 0.057, - }, - { - contextWindow: 200_000, - inputPrice: 0.29, - outputPrice: 1.14, - cacheReadsPrice: 0.057, - }, - { - contextWindow: Infinity, - inputPrice: 0.29, - outputPrice: 1.14, - cacheReadsPrice: 0.057, - }, - ], }, } as const satisfies Record -export const ZAI_DEFAULT_TEMPERATURE = 0 +export const ZAI_DEFAULT_TEMPERATURE = 0.6 export const zaiApiLineConfigs = { international_coding: { - name: "International Coding Plan", + name: "International", baseUrl: "https://api.z.ai/api/coding/paas/v4", isChina: false, }, china_coding: { - name: "China Coding Plan", + name: "China", baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4", isChina: true, }, diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index bb892960889f..14e3e2465388 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -82,7 +82,22 @@ describe("ZAiHandler", () => { const model = handlerWithModel.getModel() expect(model.id).toBe(testModelId) expect(model.info).toEqual(internationalZAiModels[testModelId]) - expect(model.info.contextWindow).toBe(204_800) + expect(model.info.contextWindow).toBe(200_000) + }) + + it("should return GLM-4.5v international model with vision support", () => { + const testModelId: InternationalZAiModelId = "glm-4.5v" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.supportsImages).toBe(true) + expect(model.info.maxTokens).toBe(16_384) + expect(model.info.contextWindow).toBe(131_072) }) }) @@ -134,6 +149,21 @@ describe("ZAiHandler", () => { expect(model.info).toEqual(mainlandZAiModels[testModelId]) expect(model.info.contextWindow).toBe(204_800) }) + + it("should return GLM-4.5v China model with vision support", () => { + const testModelId: MainlandZAiModelId = "glm-4.5v" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.supportsImages).toBe(true) + expect(model.info.maxTokens).toBe(16_384) + expect(model.info.contextWindow).toBe(131_072) + }) }) describe("Default behavior", () => { diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index ce5aab9dd9f8..a72be571d4ff 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -5,6 +5,7 @@ import { mainlandZAiDefaultModelId, type InternationalZAiModelId, type MainlandZAiModelId, + type ModelInfo, ZAI_DEFAULT_TEMPERATURE, zaiApiLineConfigs, } from "@roo-code/types" @@ -13,11 +14,11 @@ import type { ApiHandlerOptions } from "../../shared/api" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" -export class ZAiHandler extends BaseOpenAiCompatibleProvider { +export class ZAiHandler extends BaseOpenAiCompatibleProvider { constructor(options: ApiHandlerOptions) { const isChina = zaiApiLineConfigs[options.zaiApiLine ?? "international_coding"].isChina - const models = isChina ? mainlandZAiModels : internationalZAiModels - const defaultModelId = isChina ? mainlandZAiDefaultModelId : internationalZAiDefaultModelId + const models = (isChina ? mainlandZAiModels : internationalZAiModels) as unknown as Record + const defaultModelId = (isChina ? mainlandZAiDefaultModelId : internationalZAiDefaultModelId) as string super({ ...options,