diff --git a/unsloth_zoo/__init__.py b/unsloth_zoo/__init__.py index 86b376a06..3ce601b22 100644 --- a/unsloth_zoo/__init__.py +++ b/unsloth_zoo/__init__.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -__version__ = "2025.5.4" +__version__ = "2025.5.5" from importlib.util import find_spec if find_spec("unsloth") is None: diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py index 7f7683458..b156d75e1 100644 --- a/unsloth_zoo/vllm_utils.py +++ b/unsloth_zoo/vllm_utils.py @@ -414,11 +414,12 @@ def unpatch_bitsandbytes_compute_dtype(): pass -def patch_vllm(): +def patch_vllm(debug = True): # Temporary patch to disable multiprocessing for vLLM # Allows accessing model_executor os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" - os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG" + if debug: + os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG" # os.environ["VLLM_TRACE_FUNCTION"] = "1" patch_vllm_set_inductor_config() patch_bitsandbytes_quant_state() @@ -1524,7 +1525,6 @@ def generate_batches(llm, inputs, n_batches = None, lora_request = None, *args, def delete_vllm(llm = None): # From https://github.com/vllm-project/vllm/issues/1908 - import ray from vllm.distributed.parallel_state import ( destroy_model_parallel, destroy_distributed_environment, @@ -1540,7 +1540,11 @@ def delete_vllm(llm = None): torch.distributed.destroy_process_group() gc.collect() torch.cuda.empty_cache() - ray.shutdown() + try: + import ray + ray.shutdown() + except: + pass return llm pass