Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ partial-json-parser # used for parsing partial JSON outputs
pyzmq >= 25.0.0
msgspec
gguf >= 0.17.0
mistral_common[image] >= 1.10.0
mistral_common[image] >= 1.11.0
opencv-python-headless >= 4.13.0 # required for video IO
pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
Expand Down
2 changes: 1 addition & 1 deletion requirements/rocm-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ mbstrdecoder==1.1.4
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common==1.10.0
mistral-common==1.11.0
# via
# -c requirements/common.txt
# -r requirements/rocm-test.in
Expand Down
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ mbstrdecoder==1.1.3
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common==1.10.0
mistral-common==1.11.0
# via
# -c requirements/common.txt
# -r requirements/test.in
Expand Down
28 changes: 28 additions & 0 deletions tests/tokenizers_/test_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

from typing import Any

import llguidance
import pytest
from mistral_common.exceptions import InvalidMessageStructureException
from mistral_common.guidance.grammar_factory import GrammarFactory
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy

from vllm.tokenizers.mistral import (
Expand Down Expand Up @@ -2407,3 +2409,29 @@ def test_convert_ids_to_tokens(
assert actual_tokens == expected_tokens

assert mistral_tokenizer.convert_ids_to_tokens([]) == []

def test_grammar_factory(self, mistral_tokenizer: MistralTokenizer) -> None:
# works in this case cause Mistral 7B is < v11 and SPM
if not mistral_tokenizer.is_tekken:
with pytest.raises(AttributeError):
mistral_tokenizer.grammar_factory # noqa: B018
return
factory = mistral_tokenizer.grammar_factory
assert isinstance(factory, GrammarFactory)

# Test caching
factory_2 = mistral_tokenizer.grammar_factory
assert factory is factory_2

def test_llg_tokenizer(self, mistral_tokenizer: MistralTokenizer) -> None:
if not mistral_tokenizer.is_tekken:
with pytest.raises(ValueError):
mistral_tokenizer.llg_tokenizer # noqa: B018
return

llg_tokenizer = mistral_tokenizer.llg_tokenizer
assert isinstance(llg_tokenizer, llguidance.LLTokenizer)

# Test caching
llg_tokenizer_2 = mistral_tokenizer.llg_tokenizer
assert llg_tokenizer is llg_tokenizer_2
Loading
Loading