Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,8 @@ th {
| `MiniCPMForCausalLM` | MiniCPM | `openbmb/MiniCPM-2B-sft-bf16`, `openbmb/MiniCPM-2B-dpo-bf16`, `openbmb/MiniCPM-S-1B-sft`, etc. | ✅︎ | ✅︎ |
| `MiniCPM3ForCausalLM` | MiniCPM3 | `openbmb/MiniCPM3-4B`, etc. | ✅︎ | ✅︎ |
| `MiniMaxM2ForCausalLM` | MiniMax-M2 |`MiniMaxAI/MiniMax-M2`, etc. | | ✅︎ |
| `MistralForCausalLM` | Mistral, Mistral-Instruct | `mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc. | ✅︎ | ✅︎ |
| `MistralForCausalLM` | Ministral-3, Mistral, Mistral-Instruct | `mistralai/Ministral-3-3B-Instruct-2512`, `mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc. | ✅︎ | ✅︎ |
| `MistralLarge3ForCausalLM` | Mistral-Large-3-675B-Base-2512, Mistral-Large-3-675B-Instruct-2512 | `mistralai/Mistral-Large-3-675B-Base-2512`, `mistralai/Mistral-Large-3-675B-Instruct-2512`, etc. | ✅︎ | ✅︎ |
| `MixtralForCausalLM` | Mixtral-8x7B, Mixtral-8x7B-Instruct | `mistralai/Mixtral-8x7B-v0.1`, `mistralai/Mixtral-8x7B-Instruct-v0.1`, `mistral-community/Mixtral-8x22B-v0.1`, etc. | ✅︎ | ✅︎ |
| `MPTForCausalLM` | MPT, MPT-Instruct, MPT-Chat, MPT-StoryWriter | `mosaicml/mpt-7b`, `mosaicml/mpt-7b-storywriter`, `mosaicml/mpt-30b`, etc. | | ✅︎ |
| `NemotronForCausalLM` | Nemotron-3, Nemotron-4, Minitron | `nvidia/Minitron-8B-Base`, `mgoin/Nemotron-4-340B-Base-hf-FP8`, etc. | ✅︎ | ✅︎ |
Expand Down Expand Up @@ -711,7 +712,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
| `Phi3VForCausalLM` | Phi-3-Vision, Phi-3.5-Vision | T + I<sup>E+</sup> | `microsoft/Phi-3-vision-128k-instruct`, `microsoft/Phi-3.5-vision-instruct`, etc. | | ✅︎ |
| `Phi4MMForCausalLM` | Phi-4-multimodal | T + I<sup>+</sup> / T + A<sup>+</sup> / I<sup>+</sup> + A<sup>+</sup> | `microsoft/Phi-4-multimodal-instruct`, etc. | ✅︎ | ✅︎ |
| `Phi4MultimodalForCausalLM` | Phi-4-multimodal (HF Transformers) | T + I<sup>+</sup> / T + A<sup>+</sup> / I<sup>+</sup> + A<sup>+</sup> | `microsoft/Phi-4-multimodal-instruct` (with revision `refs/pr/70`), etc. | ✅︎ | ✅︎ |
| `PixtralForConditionalGeneration` | Mistral 3 (Mistral format), Pixtral (Mistral format) | T + I<sup>+</sup> | `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistralai/Pixtral-12B-2409`, etc. | | ✅︎ |
| `PixtralForConditionalGeneration` | Ministral 3 (Mistral format), Mistral 3 (Mistral format), Mistral Large 3 (Mistral format), Pixtral (Mistral format) | T + I<sup>+</sup> | `mistralai/Ministral-3-3B-Instruct-2512`, `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistralai/Mistral-Large-3-675B-Instruct-2512` `mistralai/Pixtral-12B-2409` etc. | | ✅︎ |
| `QwenVLForConditionalGeneration`<sup>^</sup> | Qwen-VL | T + I<sup>E+</sup> | `Qwen/Qwen-VL`, `Qwen/Qwen-VL-Chat`, etc. | ✅︎ | ✅︎ |
| `Qwen2AudioForConditionalGeneration` | Qwen2-Audio | T + A<sup>+</sup> | `Qwen/Qwen2-Audio-7B-Instruct` | | ✅︎ |
| `Qwen2VLForConditionalGeneration` | QVQ, Qwen2-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/QVQ-72B-Preview`, `Qwen/Qwen2-VL-7B-Instruct`, `Qwen/Qwen2-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ |
Expand Down
14 changes: 14 additions & 0 deletions tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,9 @@ def check_available_online(
trust_remote_code=True,
),
"MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
"MistralLarge3ForCausalLM": _HfExamplesInfo(
"mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4", is_available_online=False
),
"MixtralForCausalLM": _HfExamplesInfo(
"mistralai/Mixtral-8x7B-Instruct-v0.1",
{"tiny": "TitanML/tiny-mixtral"},
Expand Down Expand Up @@ -770,7 +773,13 @@ def check_available_online(
),
"PixtralForConditionalGeneration": _HfExamplesInfo(
"mistralai/Pixtral-12B-2409",
extras={
"mistral-large-3": "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4",
"ministral-3": "mistralai/Ministral-3-3B-Instruct-2512",
},
tokenizer_mode="mistral",
# TODO: revert once Mistral-Large-3 and Ministral-3 are publicly available.
is_available_online=False,
Comment on lines +776 to +782
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lmk @mgoin is this is what you had in mind here.

Comment on lines +781 to +782
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should flip this before 0.12 goes out @khluu

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's okay to leave this as is since we're cutting a branch today

),
"QwenVLForConditionalGeneration": _HfExamplesInfo(
"Qwen/Qwen-VL",
Expand Down Expand Up @@ -870,6 +879,11 @@ def check_available_online(
use_original_num_layers=True,
max_model_len=10240,
),
"EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
"mistralai/Mistral-Large-3-675B-Instruct-2512",
speculative_model="mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle",
is_available_online=False,
),
"LlamaForCausalLMEagle3": _HfExamplesInfo(
"Qwen/Qwen3-8B",
trust_remote_code=True,
Expand Down
158 changes: 151 additions & 7 deletions tests/tokenizers_/test_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,118 @@
],
),
),
(
{
"messages": [
{
"role": "user",
"content": "What is the current local date and time?",
}
],
"tools": [
{
"type": "function",
"function": {
"description": "Fetch the current local date and time.",
"unsupported_field": False,
"name": "get_current_time",
"parameters": {},
},
},
{
"type": "function",
"function": {
"description": "Fetch the current local date and time.",
"unsupported_field2": False,
"name": "get_current_time",
"parameters": {},
},
},
],
},
(
[
{
"role": "user",
"content": "What is the current local date and time?",
}
],
[
{
"type": "function",
"function": {
"description": "Fetch the current local date and time.",
"name": "get_current_time",
"parameters": {},
},
},
{
"type": "function",
"function": {
"description": "Fetch the current local date and time.",
"name": "get_current_time",
"parameters": {},
},
},
],
),
),
(
{
"messages": [
{
"role": "user",
"content": "What is the current local date and time?",
}
],
"tools": [
{
"type": "function",
"unsupported_field": False,
"function": {
"description": "Fetch the current local date and time.",
"name": "get_current_time",
"parameters": {},
},
},
{
"type": "function",
"unsupported_field2": False,
"function": {
"description": "Fetch the current local date and time 2.",
"name": "get_current_time2",
"parameters": {"a": "1"},
},
},
],
},
(
[
{
"role": "user",
"content": "What is the current local date and time?",
}
],
[
{
"type": "function",
"function": {
"description": "Fetch the current local date and time.",
"name": "get_current_time",
"parameters": {},
},
},
{
"type": "function",
"function": {
"description": "Fetch the current local date and time 2.",
"name": "get_current_time2",
"parameters": {"a": "1"},
},
},
],
),
),
],
)
def test_prepare_apply_chat_template_tools_and_messages(
Expand Down Expand Up @@ -1108,13 +1220,6 @@ def test_decode(
)
== expected_tokens[mistral_tokenizer.is_tekken]
)
assert (
mistral_tokenizer.decode(
ids[mistral_tokenizer.is_tekken],
skip_special_tokens=skip_special_tokens,
)
== expected_tokens[mistral_tokenizer.is_tekken]
)

def test_decode_empty(
self,
Expand All @@ -1140,6 +1245,45 @@ def test_decode_int(
== "<s>"
)

@pytest.mark.parametrize(
"skip_special_tokens,expected_tokens",
(
(
False,
(
["<s>[INST]▁Hello▁world▁![/INST]▁Hello</s>"],
["<s>[INST]Hello world ![/INST]Hello</s>"],
),
),
(True, (["Hello world ! Hello"], ["Hello world !Hello"])),
),
)
def test_batch_decode(
self,
mistral_tokenizer: MistralTokenizer,
skip_special_tokens: bool,
expected_tokens: tuple[str, str],
):
ids = (
[[1, 3, 23325, 2294, 1686, 4, 23325, 2]],
[[1, 3, 22177, 4304, 2662, 4, 22177, 2]],
)
assert (
mistral_tokenizer.batch_decode(
ids[mistral_tokenizer.is_tekken],
skip_special_tokens=skip_special_tokens,
)
== expected_tokens[mistral_tokenizer.is_tekken]
)

def test_batch_decode_empty(
self,
mistral_tokenizer: MistralTokenizer,
):
assert mistral_tokenizer.batch_decode(
[[]],
) == [""]

def test_convert_tokens_to_string(self, mistral_tokenizer: MistralTokenizer):
tokens = (
[
Expand Down
4 changes: 4 additions & 0 deletions vllm/config/speculative.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def compute_hash(self) -> str:

@staticmethod
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
initial_architecture = hf_config.architectures[0]
if hf_config.model_type in ("deepseek_v3", "deepseek_v32"):
hf_config.model_type = "deepseek_mtp"
if hf_config.model_type == "deepseek_mtp":
Expand Down Expand Up @@ -226,6 +227,9 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
{"n_predict": n_predict, "architectures": ["LongCatFlashMTPModel"]}
)

if initial_architecture == "MistralLarge3ForCausalLM":
hf_config.update({"architectures": ["EagleMistralLarge3ForCausalLM"]})

return hf_config

def __post_init__(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(self, tokenizer: TokenizerLike):
self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
if _is_fn_name_regex_support(self.model_tokenizer):
self.fn_name_regex = re.compile(
r"([a-zA-Z0-9_-]+)(\{[\s\S]*?\})(?=\s*$|,|\s)", re.DOTALL
r"([a-zA-Z0-9_-]+)(\{[\s\S]*?\})(?=\s*$|,|\s)?", re.DOTALL
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had to add this to make sure tool calls worked as expected for some examples, will rerun some xp to see if i didn't break anything there as we didn't author this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good on the examples I launched so this looks benign

)
else:
self.fn_name_regex = None
Expand Down
Loading