diff --git a/comfy/model_management.py b/comfy/model_management.py
index d8913082adaa..a21df54b3f38 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -503,10 +503,7 @@ def model_load(self, lowvram_model_memory=0, force_patch_weights=False):
         use_more_vram = lowvram_model_memory
         if use_more_vram == 0:
             use_more_vram = 1e32
-        if use_more_vram > 0:
-            self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
-        else:
-            self.model.partially_unload(self.model.offload_device, -use_more_vram, force_patch_weights=force_patch_weights)
+        self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
 
         real_model = self.model.model
 
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 68b0a9192753..cf1b0d4412bc 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -928,6 +928,9 @@ def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
                 extra_memory += (used - self.model.model_loaded_weight_memory)
 
             self.patch_model(load_weights=False)
+            if extra_memory < 0 and not unpatch_weights:
+                self.partially_unload(self.offload_device, -extra_memory, force_patch_weights=force_patch_weights)
+                return 0
             full_load = False
             if self.model.model_lowvram == False and self.model.model_loaded_weight_memory > 0:
                 self.apply_hooks(self.forced_hooks, force_apply=True)