diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index e03f50baa9..34a61e250d 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -153,6 +153,8 @@ def filter(self, x): return not (self.text in x.getMessage()) transformers_training_args_logger.addFilter(HideLoggingMessage("The speedups")) # torch.distributed process group is initialized, but parallel_mode != ParallelMode.DISTRIBUTED. transformers_training_args_logger.addFilter(HideLoggingMessage("torch.distributed")) +# average_tokens_across_devices is set to True but it is invalid when world size is1 +transformers_training_args_logger.addFilter(HideLoggingMessage("average_tokens_across_devices")) del transformers_training_args_logger # No label_names provided for model class diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index b0f485d90e..8dca483783 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -722,6 +722,8 @@ def _for_inference(m): pass # Must disable returning hidden states in the case for GRPO os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "0" + # Must enable returning logits + os.environ["UNSLOTH_RETURN_LOGITS"] = "1" return model pass @@ -760,6 +762,8 @@ def _for_training(m): embeddings = model.get_output_embeddings() if hasattr(embeddings, "training"): embeddings.training = True pass + # Can re-enable not returning logits + os.environ["UNSLOTH_RETURN_LOGITS"] = "0" return model pass pass