Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ schemars = "0.8.22"
serde_yaml = "0.9.34"
serde_plain = "1.0.2"
as-any = "0.3.2"
llguidance = { version = "0.7.29", default-features = false, features = ["lark"] }
toktrie_hf_tokenizers = "0.7.29"
llguidance = { git = "https://github.com/guidance-ai/llguidance.git", version = "0.7.29", default-features = false, features = ["lark"], rev = "2ce5ab8" }
toktrie_hf_tokenizers = {git = "https://github.com/guidance-ai/llguidance.git", version = "0.7.29", rev = "2ce5ab8" }
objc = { version = "0.2.7" }
serde-big-array = "0.5.1"
interprocess = "2.2.3"
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ If you do not specify the architecture, an attempt will be made to use the model
- `phi3.5moe`
- `qwen2`
- `gemma2`
- `glm4`
- `starcoder2`
- `deepseekv2`
- `deepseekv3`
Expand Down Expand Up @@ -426,6 +427,7 @@ If you do not specify the architecture, an attempt will be made to use the model
- phi3
- starcoder2
- qwen2
- qwen3

**With adapters:**
- llama
Expand Down Expand Up @@ -456,6 +458,7 @@ Please submit more benchmarks via raising an issue!
|Phi 3 Vision| | |✅|
|Idefics 2| | |✅|
|Gemma 2| | |✅|
|GLM4| | |✅|
|Starcoder 2| |✅|✅|
|LLaVa Next| | |✅|
|LLaVa| | |✅|
Expand All @@ -469,7 +472,7 @@ Please submit more benchmarks via raising an issue!
|Gemma 3| | |✅|
|Mistral 3| | |✅|
|Llama 4| | |✅|
|Qwen 3| | |✅|
|Qwen 3|| |✅|
|Dia 1.6b| | |✅|
</details>

Expand Down Expand Up @@ -502,6 +505,7 @@ Please submit more benchmarks via raising an issue!
|Phi 3 Vision| | | |
|Idefics 2| | | |
|Gemma 2|✅| | |
|GLM4|✅| | |
|Starcoder 2|✅| | |
|LLaVa Next| | | |
|LLaVa| | | |
Expand Down
59 changes: 59 additions & 0 deletions docs/GLM4.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# GLM4 Model

**[See the GLM4 model Collection](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e)**

GLM4 is a series of open, multilingual, and multimodal large language models. The text-to-text LLM backbones in GLM4 are supported by mistral.rs.

## HTTP API

```py
import openai

messages = []
prompt = input("Enter system prompt >>> ")
if len(prompt) > 0:
messages.append({"role": "system", "content": prompt})


while True:
prompt = input(">>> ")
messages.append({"role": "user", "content": prompt})
completion = client.chat.completions.create(
model="glm4",
messages=messages,
max_tokens=256,
frequency_penalty=1.0,
top_p=0.1,
temperature=0,
)
resp = completion.choices[0].message.content
print(resp)
messages.append({"role": "assistant", "content": resp})
```

## Python API
```py
from mistralrs import Runner, Which, ChatCompletionRequest, Architecture

runner = Runner(
which=Which.Plain(
model_id="THUDM/GLM-4-9B-0414",
arch=Architecture.GLM4,
),
)

res = runner.send_chat_completion_request(
ChatCompletionRequest(
model="mistral",
messages=[
{"role": "user", "content": "Tell me a story about the Rust type system."}
],
max_tokens=256,
presence_penalty=1.0,
top_p=0.1,
temperature=0.1,
)
)
print(res.choices[0].message.content)
print(res.usage)
```
36 changes: 36 additions & 0 deletions mistralrs-core/src/layers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2127,6 +2127,42 @@ impl Mlp {
})
}

pub fn new_merged(
vb: ShardedVarBuilder,
hidden_size: usize,
intermediate_size: usize,
chunks: usize,
quantization_config: &Option<QuantizedConfig>,
hidden_act: Activation,
comm: &Arc<mistralrs_quant::Comm>,
) -> Result<Self> {
assert!(chunks == 2, "Only gate_up_proj merge is supported!");
let gate_up_projs = ColumnParallelLayer::new_merged(
hidden_size,
intermediate_size * 2,
2,
quantization_config,
false,
comm,
vb.pp("gate_up_proj"),
)?;

Ok(Self {
gate: gate_up_projs[0].to_owned(),
up: gate_up_projs[1].to_owned(),
down: RowParallelLayer::new(
intermediate_size,
hidden_size,
quantization_config,
false,
comm,
vb.pp("down_proj"),
)?,
act: hidden_act,
params: vec![hidden_size, intermediate_size],
})
}

pub fn replicate(
params: &[usize],
vb: ShardedVarBuilder,
Expand Down
Loading
Loading