diff --git a/unsloth_zoo/__init__.py b/unsloth_zoo/__init__.py
index 86b376a06..3ce601b22 100644
--- a/unsloth_zoo/__init__.py
+++ b/unsloth_zoo/__init__.py
@@ -14,7 +14,7 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-__version__ = "2025.5.4"
+__version__ = "2025.5.5"
 
 from importlib.util import find_spec
 if find_spec("unsloth") is None:
diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py
index 7f7683458..b156d75e1 100644
--- a/unsloth_zoo/vllm_utils.py
+++ b/unsloth_zoo/vllm_utils.py
@@ -414,11 +414,12 @@ def unpatch_bitsandbytes_compute_dtype():
 pass
 
 
-def patch_vllm():
+def patch_vllm(debug = True):
     # Temporary patch to disable multiprocessing for vLLM
     # Allows accessing model_executor
     os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
-    os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
+    if debug:
+        os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
     # os.environ["VLLM_TRACE_FUNCTION"] = "1"
     patch_vllm_set_inductor_config()
     patch_bitsandbytes_quant_state()
@@ -1524,7 +1525,6 @@ def generate_batches(llm, inputs, n_batches = None, lora_request = None, *args,
 
 def delete_vllm(llm = None):
     # From https://github.com/vllm-project/vllm/issues/1908
-    import ray
     from vllm.distributed.parallel_state import (
         destroy_model_parallel,
         destroy_distributed_environment,
@@ -1540,7 +1540,11 @@ def delete_vllm(llm = None):
         torch.distributed.destroy_process_group()
     gc.collect()
     torch.cuda.empty_cache()
-    ray.shutdown()
+    try:
+        import ray
+        ray.shutdown()
+    except:
+        pass
     return llm
 pass