Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions unsloth_zoo/vllm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1675,22 +1675,22 @@ def load_vllm(
total_gb = total_memory/1024/1024/1024
ten_percent = total_gb * 0.1 # 1.46GB for T4
if UNSLOTH_ENABLE_LOGGING:
logger.log(f"10% of your GPU VRAM = {ten_percent}")
logger.info(f"10% of your GPU VRAM = {ten_percent:.2f} GB")
if ten_percent >= 4.0: standby_target_gpu_util = 0.925
elif ten_percent >= 2.5: standby_target_gpu_util = 0.9
elif ten_percent >= 2.0: standby_target_gpu_util = 0.875
elif ten_percent >= 1.4: standby_target_gpu_util = 0.85
elif ten_percent >= 1.0: standby_target_gpu_util = 0.8
else: standby_target_gpu_util = 0.75
if UNSLOTH_ENABLE_LOGGING:
logger.log(f"standby_target_gpu_util = {standby_target_gpu_util}")
logger.info(f"standby_target_gpu_util = {standby_target_gpu_util:.3f}")
# Reduce memory usage for newer vLLM versions since it OOMs
if Version(vllm_version) >= Version("0.11.0"):
if UNSLOTH_ENABLE_LOGGING:
logger.log(f"Decreasing VRAM further since vLLM version >= 0.11.0 uses more")
logger.info(f"Decreasing VRAM further since vLLM version >= 0.11.0 uses more")
standby_target_gpu_util *= 0.95
if UNSLOTH_ENABLE_LOGGING:
logger.log(f"Further reduced standby_target_gpu_util = {standby_target_gpu_util}")
logger.info(f"Further reduced standby_target_gpu_util = {standby_target_gpu_util:.4f}")

if unsloth_vllm_standby and not standby_util_override:
if gpu_memory_utilization < standby_target_gpu_util:
Expand Down