From 98b1e715e302643f1bcbd76d6a6963888deb8821 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 25 May 2026 11:46:11 +0000
Subject: [PATCH 1/5] vision_utils: clamp resize new_w/new_h to >=1

The integer-rounding formula in _resize_images_inplace can produce
new_w=0 or new_h=0 for inputs with extreme aspect ratios (e.g. 1024x1
with image_size=256: new_h = (1*256 + 512) // 1024 = 0). PIL.resize
then raises "height and width must be > 0".

This is reachable from Studio's vision_image_size knob when a dataset
contains a degenerate image and the user picks a small cap. The fix
mirrors the max(1, ...) guard already present in Studio's MLX resize
helper (studio/backend/core/training/worker.py::_mlx_vlm_max_resized_size).
---
 unsloth_zoo/vision_utils.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py
index 1f36a595c..0d1f7aa05 100644
--- a/unsloth_zoo/vision_utils.py
+++ b/unsloth_zoo/vision_utils.py
@@ -978,9 +978,11 @@ def quantize_to_factor(x):
                 image[i] = img.resize(image_size, LANCZOS)
             elif self.size_func(img) > image_size and hasattr(img, "resize"):
                 w, h = img.size
-                # integer math rounding
-                new_w = (w * image_size + self.size_func(img) // 2) // self.size_func(img)
-                new_h = (h * image_size + self.size_func(img) // 2) // self.size_func(img)
+                # integer math rounding; clamp to >=1 so degenerate aspect
+                # ratios (e.g. 1024x1, 4000x4) where the downscale would
+                # round one side to 0 do not crash PIL.resize.
+                new_w = max(1, (w * image_size + self.size_func(img) // 2) // self.size_func(img))
+                new_h = max(1, (h * image_size + self.size_func(img) // 2) // self.size_func(img))
                 if self.snap_to_patch_size:
                     factor = self.patch_size * 2
                     new_w, new_h = quantize_to_factor(new_w), quantize_to_factor(new_h)

From 14ced560fe2716abe33405468b49f131c78f1746 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 25 May 2026 13:16:46 +0000
Subject: [PATCH 2/5] Tighten degenerate-aspect clamp comment

---
 unsloth_zoo/vision_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py
index 0d1f7aa05..6903b0acd 100644
--- a/unsloth_zoo/vision_utils.py
+++ b/unsloth_zoo/vision_utils.py
@@ -978,9 +978,8 @@ def quantize_to_factor(x):
                 image[i] = img.resize(image_size, LANCZOS)
             elif self.size_func(img) > image_size and hasattr(img, "resize"):
                 w, h = img.size
-                # integer math rounding; clamp to >=1 so degenerate aspect
-                # ratios (e.g. 1024x1, 4000x4) where the downscale would
-                # round one side to 0 do not crash PIL.resize.
+                # integer math rounding; max(1, _) avoids zero-side crash
+                # on degenerate aspect ratios (e.g. 1024x1 with image_size=256).
                 new_w = max(1, (w * image_size + self.size_func(img) // 2) // self.size_func(img))
                 new_h = max(1, (h * image_size + self.size_func(img) // 2) // self.size_func(img))
                 if self.snap_to_patch_size:

From 5ab1205cffe183ec205e6ff840b038d789ef8bad Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 27 May 2026 13:38:30 +0000
Subject: [PATCH 3/5] Refactor _resize_images_inplace: cache size_func, hoist
 invariants

Pure refactor on top of the existing clamp; output byte-identical for every input that has worked since the collator landed.

- Hoist is_tuple, snap_to_patch_size, factor out of the per-image loop.
- Cache size_func(img) so it is not called 3x per image.

No behavioral change. Same code paths, same numeric output, same exception surface.
---
 unsloth_zoo/vision_utils.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py
index 6903b0acd..e54aa9310 100644
--- a/unsloth_zoo/vision_utils.py
+++ b/unsloth_zoo/vision_utils.py
@@ -972,18 +972,25 @@ def quantize_to_factor(x):
             return image or []
         # Resize images
         image_size = self.image_size
+        # Loop invariants hoisted once per call.
+        is_tuple = type(image_size) is tuple
+        snap = self.snap_to_patch_size
+        if snap:
+            factor = self.patch_size * 2
 
         for i, img in enumerate(image):
-            if type(image_size) is tuple:
+            if is_tuple:
                 image[i] = img.resize(image_size, LANCZOS)
-            elif self.size_func(img) > image_size and hasattr(img, "resize"):
+                continue
+            # Cache size_func(img) so it is not called 3x per image.
+            side = self.size_func(img)
+            if side > image_size and hasattr(img, "resize"):
                 w, h = img.size
                 # integer math rounding; max(1, _) avoids zero-side crash
                 # on degenerate aspect ratios (e.g. 1024x1 with image_size=256).
-                new_w = max(1, (w * image_size + self.size_func(img) // 2) // self.size_func(img))
-                new_h = max(1, (h * image_size + self.size_func(img) // 2) // self.size_func(img))
-                if self.snap_to_patch_size:
-                    factor = self.patch_size * 2
+                new_w = max(1, (w * image_size + side // 2) // side)
+                new_h = max(1, (h * image_size + side // 2) // side)
+                if snap:
                     new_w, new_h = quantize_to_factor(new_w), quantize_to_factor(new_h)
 
                 image[i] = img.resize((new_w, new_h), LANCZOS)

From 39737603cca437712ab5a30e79e8ef58ae838683 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 27 May 2026 15:00:02 +0000
Subject: [PATCH 4/5] Warn on post-resize aspect_ratio > MAX_RATIO

After the clamp, surface a one-shot UserWarning when the resized image
would have aspect_ratio > 200 (MAX_RATIO). Qwen2-VL / Qwen2.5-VL
preprocessors reject such inputs in their own smart_resize; without
this warning users only see the downstream crash and have no signal
that the issue is a degenerate-aspect training image. Non-degenerate
inputs are unaffected (warning is gated on the same MAX_RATIO check
zoo's own smart_resize already enforces).
---
 unsloth_zoo/vision_utils.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py
index e54aa9310..04ef1bd56 100644
--- a/unsloth_zoo/vision_utils.py
+++ b/unsloth_zoo/vision_utils.py
@@ -92,6 +92,10 @@
 MAX_PIXELS = 16384 * 28 * 28
 MAX_RATIO = 200
 
+# One-shot guard so the degenerate-aspect warning fires once per process,
+# not once per image / per batch.
+_WARNED_DEGENERATE_ASPECT = False
+
 VIDEO_MIN_PIXELS = 128 * 28 * 28
 VIDEO_MAX_PIXELS = 768 * 28 * 28
 VIDEO_TOTAL_PIXELS = int(float(os.environ.get('VIDEO_MAX_PIXELS', 128000 * 28 * 28 * 0.9)))
@@ -993,6 +997,25 @@ def quantize_to_factor(x):
                 if snap:
                     new_w, new_h = quantize_to_factor(new_w), quantize_to_factor(new_h)
 
+                # Heads-up: Qwen2-VL / Qwen2.5-VL preprocessors reject inputs
+                # with aspect_ratio > MAX_RATIO via smart_resize. Surface a
+                # single, actionable warning so users learn to filter their
+                # dataset before the downstream crash.
+                global _WARNED_DEGENERATE_ASPECT
+                if (not _WARNED_DEGENERATE_ASPECT
+                        and max(new_w, new_h) > MAX_RATIO * min(new_w, new_h)):
+                    _WARNED_DEGENERATE_ASPECT = True
+                    warnings.warn(
+                        f"Unsloth: image {w}x{h} resized to "
+                        f"({new_w}, {new_h}) has aspect ratio "
+                        f"{max(new_w, new_h) // min(new_w, new_h)}, exceeding "
+                        f"MAX_RATIO={MAX_RATIO}. Qwen2-VL / Qwen2.5-VL will "
+                        "reject this in smart_resize; filter degenerate-aspect "
+                        "images from your dataset before training those models.",
+                        UserWarning,
+                        stacklevel = 2,
+                    )
+
                 image[i] = img.resize((new_w, new_h), LANCZOS)
 
         return image

From 4a026c1b128d8b62213f9ddba810d49e6136ed68 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 27 May 2026 15:45:37 +0000
Subject: [PATCH 5/5] Tighten comments in vision_utils._resize_images_inplace

---
 unsloth_zoo/vision_utils.py | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py
index 04ef1bd56..7d6b6cc9b 100644
--- a/unsloth_zoo/vision_utils.py
+++ b/unsloth_zoo/vision_utils.py
@@ -92,8 +92,7 @@
 MAX_PIXELS = 16384 * 28 * 28
 MAX_RATIO = 200
 
-# One-shot guard so the degenerate-aspect warning fires once per process,
-# not once per image / per batch.
+# Fire degenerate-aspect warning once per process.
 _WARNED_DEGENERATE_ASPECT = False
 
 VIDEO_MIN_PIXELS = 128 * 28 * 28
@@ -976,7 +975,7 @@ def quantize_to_factor(x):
             return image or []
         # Resize images
         image_size = self.image_size
-        # Loop invariants hoisted once per call.
+        # Hoist loop invariants.
         is_tuple = type(image_size) is tuple
         snap = self.snap_to_patch_size
         if snap:
@@ -986,32 +985,26 @@ def quantize_to_factor(x):
             if is_tuple:
                 image[i] = img.resize(image_size, LANCZOS)
                 continue
-            # Cache size_func(img) so it is not called 3x per image.
+            # Cache size_func(img) once.
             side = self.size_func(img)
             if side > image_size and hasattr(img, "resize"):
                 w, h = img.size
-                # integer math rounding; max(1, _) avoids zero-side crash
-                # on degenerate aspect ratios (e.g. 1024x1 with image_size=256).
+                # max(1, _) avoids zero-side crash on degenerate aspect ratios.
                 new_w = max(1, (w * image_size + side // 2) // side)
                 new_h = max(1, (h * image_size + side // 2) // side)
                 if snap:
                     new_w, new_h = quantize_to_factor(new_w), quantize_to_factor(new_h)
 
-                # Heads-up: Qwen2-VL / Qwen2.5-VL preprocessors reject inputs
-                # with aspect_ratio > MAX_RATIO via smart_resize. Surface a
-                # single, actionable warning so users learn to filter their
-                # dataset before the downstream crash.
+                # Qwen2-VL smart_resize rejects aspect > MAX_RATIO; warn once.
                 global _WARNED_DEGENERATE_ASPECT
                 if (not _WARNED_DEGENERATE_ASPECT
                         and max(new_w, new_h) > MAX_RATIO * min(new_w, new_h)):
                     _WARNED_DEGENERATE_ASPECT = True
                     warnings.warn(
-                        f"Unsloth: image {w}x{h} resized to "
-                        f"({new_w}, {new_h}) has aspect ratio "
-                        f"{max(new_w, new_h) // min(new_w, new_h)}, exceeding "
-                        f"MAX_RATIO={MAX_RATIO}. Qwen2-VL / Qwen2.5-VL will "
-                        "reject this in smart_resize; filter degenerate-aspect "
-                        "images from your dataset before training those models.",
+                        f"Unsloth: {w}x{h} -> ({new_w}, {new_h}) aspect "
+                        f"{max(new_w, new_h) // min(new_w, new_h)} > "
+                        f"MAX_RATIO={MAX_RATIO}. Qwen2-VL/2.5-VL will reject; "
+                        "filter degenerate-aspect images from your dataset.",
                         UserWarning,
                         stacklevel = 2,
                     )