diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 893a09dd14..a879ee0d69 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -1548,7 +1548,7 @@ def unsloth_fast_generate( if "input_ids" in kwargs and kwargs["input_ids"] is not None and "max_new_tokens" in kwargs: if kwargs["input_ids"].shape[-1] + kwargs["max_new_tokens"] > self.config.max_position_embeddings: raise ValueError( - f'Unsloth: input length {kwargs["input_ids"].shape[-1]} + max_new_tokens {kwargs["max_new_tokens"]} exceeds the maximum sequence length of {model.config.max_position_embeddings}!\n'\ + f'Unsloth: input length {kwargs["input_ids"].shape[-1]} + max_new_tokens {kwargs["max_new_tokens"]} exceeds the maximum sequence length of {self.config.max_position_embeddings}!\n'\ 'You will need to do long context extension by increasing the `max_seq_length` in `FastLanguageModel.from_pretrained`.' ) pass