diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py index 8eeeadac4..80ed5ec58 100644 --- a/unsloth_zoo/vllm_utils.py +++ b/unsloth_zoo/vllm_utils.py @@ -1675,7 +1675,7 @@ def load_vllm( total_gb = total_memory/1024/1024/1024 ten_percent = total_gb * 0.1 # 1.46GB for T4 if UNSLOTH_ENABLE_LOGGING: - logger.log(f"10% of your GPU VRAM = {ten_percent}") + logger.info(f"10% of your GPU VRAM = {ten_percent:.2f} GB") if ten_percent >= 4.0: standby_target_gpu_util = 0.925 elif ten_percent >= 2.5: standby_target_gpu_util = 0.9 elif ten_percent >= 2.0: standby_target_gpu_util = 0.875 @@ -1683,14 +1683,14 @@ def load_vllm( elif ten_percent >= 1.0: standby_target_gpu_util = 0.8 else: standby_target_gpu_util = 0.75 if UNSLOTH_ENABLE_LOGGING: - logger.log(f"standby_target_gpu_util = {standby_target_gpu_util}") + logger.info(f"standby_target_gpu_util = {standby_target_gpu_util:.3f}") # Reduce memory usage for newer vLLM versions since it OOMs if Version(vllm_version) >= Version("0.11.0"): if UNSLOTH_ENABLE_LOGGING: - logger.log(f"Decreasing VRAM further since vLLM version >= 0.11.0 uses more") + logger.info(f"Decreasing VRAM further since vLLM version >= 0.11.0 uses more") standby_target_gpu_util *= 0.95 if UNSLOTH_ENABLE_LOGGING: - logger.log(f"Further reduced standby_target_gpu_util = {standby_target_gpu_util}") + logger.info(f"Further reduced standby_target_gpu_util = {standby_target_gpu_util:.4f}") if unsloth_vllm_standby and not standby_util_override: if gpu_memory_utilization < standby_target_gpu_util: