Skip to content

Commit 03f8a9d

Browse files
guoqingbaoEricLBuehler
authored andcommitted
Support GLM4 model! (#1437)
* Support GLM4 model * Mention GLM4 model in ReadMe * glm4 type hint * Typo fix * Fix unsupported chat_template function * Clippy fix
1 parent 6bededc commit 03f8a9d

File tree

18 files changed

+1264
-31
lines changed

18 files changed

+1264
-31
lines changed

Cargo.lock

Lines changed: 3 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ schemars = "0.8.22"
128128
serde_yaml = "0.9.34"
129129
serde_plain = "1.0.2"
130130
as-any = "0.3.2"
131-
llguidance = { version = "0.7.29", default-features = false, features = ["lark"] }
132-
toktrie_hf_tokenizers = "0.7.29"
131+
llguidance = { git = "https://github.com/guidance-ai/llguidance.git", version = "0.7.29", default-features = false, features = ["lark"], rev = "2ce5ab8" }
132+
toktrie_hf_tokenizers = {git = "https://github.com/guidance-ai/llguidance.git", version = "0.7.29", rev = "2ce5ab8" }
133133
objc = { version = "0.2.7" }
134134
serde-big-array = "0.5.1"
135135
interprocess = "2.2.3"

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ If you do not specify the architecture, an attempt will be made to use the model
463463
- `phi3.5moe`
464464
- `qwen2`
465465
- `gemma2`
466+
- `glm4`
466467
- `starcoder2`
467468
- `deepseekv2`
468469
- `deepseekv3`
@@ -505,6 +506,7 @@ If you do not specify the architecture, an attempt will be made to use the model
505506
- phi3
506507
- starcoder2
507508
- qwen2
509+
- qwen3
508510
509511
**With adapters:**
510512
- llama
@@ -535,6 +537,7 @@ Please submit more benchmarks via raising an issue!
535537
|Phi 3 Vision| | |✅|
536538
|Idefics 2| | |✅|
537539
|Gemma 2| | |✅|
540+
|GLM4| | |✅|
538541
|Starcoder 2| |✅|✅|
539542
|LLaVa Next| | |✅|
540543
|LLaVa| | |✅|
@@ -548,7 +551,7 @@ Please submit more benchmarks via raising an issue!
548551
|Gemma 3| | |✅|
549552
|Mistral 3| | |✅|
550553
|Llama 4| | |✅|
551-
|Qwen 3| | |✅|
554+
|Qwen 3|| |✅|
552555
|Dia 1.6b| | |✅|
553556
</details>
554557
@@ -581,6 +584,7 @@ Please submit more benchmarks via raising an issue!
581584
|Phi 3 Vision| | | |
582585
|Idefics 2| | | |
583586
|Gemma 2|✅| | |
587+
|GLM4|✅| | |
584588
|Starcoder 2|✅| | |
585589
|LLaVa Next| | | |
586590
|LLaVa| | | |

docs/GLM4.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# GLM4 Model
2+
3+
**[See the GLM4 model Collection](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e)**
4+
5+
GLM4 is a series of open, multilingual, and multimodal large language models. The text-to-text LLM backbones in GLM4 are supported by mistral.rs.
6+
7+
## HTTP API
8+
9+
```py
10+
import openai
11+
12+
messages = []
13+
prompt = input("Enter system prompt >>> ")
14+
if len(prompt) > 0:
15+
messages.append({"role": "system", "content": prompt})
16+
17+
18+
while True:
19+
prompt = input(">>> ")
20+
messages.append({"role": "user", "content": prompt})
21+
completion = client.chat.completions.create(
22+
model="glm4",
23+
messages=messages,
24+
max_tokens=256,
25+
frequency_penalty=1.0,
26+
top_p=0.1,
27+
temperature=0,
28+
)
29+
resp = completion.choices[0].message.content
30+
print(resp)
31+
messages.append({"role": "assistant", "content": resp})
32+
```
33+
34+
## Python API
35+
```py
36+
from mistralrs import Runner, Which, ChatCompletionRequest, Architecture
37+
38+
runner = Runner(
39+
which=Which.Plain(
40+
model_id="THUDM/GLM-4-9B-0414",
41+
arch=Architecture.GLM4,
42+
),
43+
)
44+
45+
res = runner.send_chat_completion_request(
46+
ChatCompletionRequest(
47+
model="glm4",
48+
messages=[
49+
{"role": "user", "content": "Tell me a story about the Rust type system."}
50+
],
51+
max_tokens=256,
52+
presence_penalty=1.0,
53+
top_p=0.1,
54+
temperature=0.1,
55+
)
56+
)
57+
print(res.choices[0].message.content)
58+
print(res.usage)
59+
```

mistralrs-core/src/layers.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,6 +2185,42 @@ impl Mlp {
21852185
})
21862186
}
21872187

2188+
pub fn new_merged(
2189+
vb: ShardedVarBuilder,
2190+
hidden_size: usize,
2191+
intermediate_size: usize,
2192+
chunks: usize,
2193+
quantization_config: &Option<QuantizedConfig>,
2194+
hidden_act: Activation,
2195+
comm: &Arc<mistralrs_quant::Comm>,
2196+
) -> Result<Self> {
2197+
assert!(chunks == 2, "Only gate_up_proj merge is supported!");
2198+
let gate_up_projs = ColumnParallelLayer::new_merged(
2199+
hidden_size,
2200+
intermediate_size * 2,
2201+
2,
2202+
quantization_config,
2203+
false,
2204+
comm,
2205+
vb.pp("gate_up_proj"),
2206+
)?;
2207+
2208+
Ok(Self {
2209+
gate: gate_up_projs[0].to_owned(),
2210+
up: gate_up_projs[1].to_owned(),
2211+
down: RowParallelLayer::new(
2212+
intermediate_size,
2213+
hidden_size,
2214+
quantization_config,
2215+
false,
2216+
comm,
2217+
vb.pp("down_proj"),
2218+
)?,
2219+
act: hidden_act,
2220+
params: vec![hidden_size, intermediate_size],
2221+
})
2222+
}
2223+
21882224
pub fn replicate(
21892225
params: &[usize],
21902226
vb: ShardedVarBuilder,

0 commit comments

Comments
 (0)