Skip to content
184 changes: 112 additions & 72 deletions packages/types/src/providers/zai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@ import type { ModelInfo } from "../model.js"
import { ZaiApiLine } from "../provider-settings.js"

// Z AI
// https://docs.z.ai/guides/llm/glm-4-32b-0414-128k
// https://docs.z.ai/guides/llm/glm-4.5
// https://docs.z.ai/guides/llm/glm-4.6
// https://docs.z.ai/guides/overview/pricing
// https://bigmodel.cn/pricing

export type InternationalZAiModelId = keyof typeof internationalZAiModels
export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.5"
export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.6"
export const internationalZAiModels = {
"glm-4.5": {
maxTokens: 98_304,
Expand All @@ -32,9 +35,55 @@ export const internationalZAiModels = {
description:
"GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.",
},
"glm-4.5-x": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 2.2,
outputPrice: 8.9,
cacheWritesPrice: 0,
cacheReadsPrice: 0.45,
description:
"GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.",
},
"glm-4.5-airx": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.5,
cacheWritesPrice: 0,
cacheReadsPrice: 0.22,
description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.",
},
"glm-4.5-flash": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
cacheWritesPrice: 0,
cacheReadsPrice: 0,
description: "GLM-4.5-Flash is a free, high-speed model excellent for reasoning, coding, and agentic tasks.",
},
"glm-4.5v": {
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.6,
outputPrice: 1.8,
cacheWritesPrice: 0,
cacheReadsPrice: 0.11,
description:
"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
},
"glm-4.6": {
maxTokens: 98_304,
contextWindow: 204_800,
contextWindow: 200_000,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The mainland China model configuration for glm-4.6 still has contextWindow: 204_800 (line 221), creating an inconsistency with the international configuration. The same model ID should have the same context window across both API lines.

supportsImages: false,
supportsPromptCache: true,
inputPrice: 0.6,
Expand All @@ -44,10 +93,21 @@ export const internationalZAiModels = {
description:
"GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.",
},
"glm-4-32b-0414-128k": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.1,
outputPrice: 0.1,
cacheWritesPrice: 0,
cacheReadsPrice: 0,
description: "GLM-4-32B is a 32 billion parameter model with 128k context length, optimized for efficiency.",
},
} as const satisfies Record<string, ModelInfo>

export type MainlandZAiModelId = keyof typeof mainlandZAiModels
export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.5"
export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.6"
export const mainlandZAiModels = {
"glm-4.5": {
maxTokens: 98_304,
Expand All @@ -60,26 +120,6 @@ export const mainlandZAiModels = {
cacheReadsPrice: 0.057,
description:
"GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.",
tiers: [
{
contextWindow: 32_000,
inputPrice: 0.21,
outputPrice: 1.0,
cacheReadsPrice: 0.043,
},
{
contextWindow: 128_000,
inputPrice: 0.29,
outputPrice: 1.14,
cacheReadsPrice: 0.057,
},
{
contextWindow: Infinity,
inputPrice: 0.29,
outputPrice: 1.14,
cacheReadsPrice: 0.057,
},
],
},
"glm-4.5-air": {
maxTokens: 98_304,
Expand All @@ -92,26 +132,52 @@ export const mainlandZAiModels = {
cacheReadsPrice: 0.02,
description:
"GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.",
tiers: [
{
contextWindow: 32_000,
inputPrice: 0.07,
outputPrice: 0.4,
cacheReadsPrice: 0.014,
},
{
contextWindow: 128_000,
inputPrice: 0.1,
outputPrice: 0.6,
cacheReadsPrice: 0.02,
},
{
contextWindow: Infinity,
inputPrice: 0.1,
outputPrice: 0.6,
cacheReadsPrice: 0.02,
},
],
},
"glm-4.5-x": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0.29,
outputPrice: 1.14,
cacheWritesPrice: 0,
cacheReadsPrice: 0.057,
description:
"GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.",
},
"glm-4.5-airx": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0.1,
outputPrice: 0.6,
cacheWritesPrice: 0,
cacheReadsPrice: 0.02,
description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.",
},
"glm-4.5-flash": {
maxTokens: 98_304,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
cacheWritesPrice: 0,
cacheReadsPrice: 0,
description: "GLM-4.5-Flash is a free, high-speed model excellent for reasoning, coding, and agentic tasks.",
},
"glm-4.5v": {
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.29,
outputPrice: 0.93,
cacheWritesPrice: 0,
cacheReadsPrice: 0.057,
description:
"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
},
"glm-4.6": {
maxTokens: 98_304,
Expand All @@ -124,45 +190,19 @@ export const mainlandZAiModels = {
cacheReadsPrice: 0.057,
description:
"GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.",
tiers: [
{
contextWindow: 32_000,
inputPrice: 0.21,
outputPrice: 1.0,
cacheReadsPrice: 0.043,
},
{
contextWindow: 128_000,
inputPrice: 0.29,
outputPrice: 1.14,
cacheReadsPrice: 0.057,
},
{
contextWindow: 200_000,
inputPrice: 0.29,
outputPrice: 1.14,
cacheReadsPrice: 0.057,
},
{
contextWindow: Infinity,
inputPrice: 0.29,
outputPrice: 1.14,
cacheReadsPrice: 0.057,
},
],
},
} as const satisfies Record<string, ModelInfo>

export const ZAI_DEFAULT_TEMPERATURE = 0
export const ZAI_DEFAULT_TEMPERATURE = 0.6

export const zaiApiLineConfigs = {
international_coding: {
name: "International Coding Plan",
name: "International",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
isChina: false,
},
china_coding: {
name: "China Coding Plan",
name: "China",
baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4",
isChina: true,
},
Expand Down
32 changes: 31 additions & 1 deletion src/api/providers/__tests__/zai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,22 @@ describe("ZAiHandler", () => {
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(internationalZAiModels[testModelId])
expect(model.info.contextWindow).toBe(204_800)
expect(model.info.contextWindow).toBe(200_000)
})

it("should return GLM-4.5v international model with vision support", () => {
const testModelId: InternationalZAiModelId = "glm-4.5v"
const handlerWithModel = new ZAiHandler({
apiModelId: testModelId,
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(internationalZAiModels[testModelId])
expect(model.info.supportsImages).toBe(true)
expect(model.info.maxTokens).toBe(16_384)
expect(model.info.contextWindow).toBe(131_072)
})
})

Expand Down Expand Up @@ -134,6 +149,21 @@ describe("ZAiHandler", () => {
expect(model.info).toEqual(mainlandZAiModels[testModelId])
expect(model.info.contextWindow).toBe(204_800)
})

it("should return GLM-4.5v China model with vision support", () => {
const testModelId: MainlandZAiModelId = "glm-4.5v"
const handlerWithModel = new ZAiHandler({
apiModelId: testModelId,
zaiApiKey: "test-zai-api-key",
zaiApiLine: "china_coding",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(mainlandZAiModels[testModelId])
expect(model.info.supportsImages).toBe(true)
expect(model.info.maxTokens).toBe(16_384)
expect(model.info.contextWindow).toBe(131_072)
})
})

describe("Default behavior", () => {
Expand Down
7 changes: 4 additions & 3 deletions src/api/providers/zai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
mainlandZAiDefaultModelId,
type InternationalZAiModelId,
type MainlandZAiModelId,
type ModelInfo,
ZAI_DEFAULT_TEMPERATURE,
zaiApiLineConfigs,
} from "@roo-code/types"
Expand All @@ -13,11 +14,11 @@ import type { ApiHandlerOptions } from "../../shared/api"

import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"

export class ZAiHandler extends BaseOpenAiCompatibleProvider<InternationalZAiModelId | MainlandZAiModelId> {
export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
constructor(options: ApiHandlerOptions) {
const isChina = zaiApiLineConfigs[options.zaiApiLine ?? "international_coding"].isChina
const models = isChina ? mainlandZAiModels : internationalZAiModels
const defaultModelId = isChina ? mainlandZAiDefaultModelId : internationalZAiDefaultModelId
const models = (isChina ? mainlandZAiModels : internationalZAiModels) as unknown as Record<string, ModelInfo>
const defaultModelId = (isChina ? mainlandZAiDefaultModelId : internationalZAiDefaultModelId) as string

super({
...options,
Expand Down