From cc29fc4ff16f6d37a732d7784fe8dd7829789a66 Mon Sep 17 00:00:00 2001
From: danielhanchen <danielhanchen@gmail.com>
Date: Wed, 15 Apr 2026 14:07:41 +0000
Subject: [PATCH 1/4] Fix Windows file lock on resized shard rewrite with
 atomic os.replace

The vocab-resize rewrite branch in _merge_and_overwrite_lora re-opened
filename_original via safe_open before calling save_file on the same
path. On Windows the Rust safetensors MapViewOfFile release can lag
after the context manager exits, so the follow-up write hit WinError
1224 ("cannot be performed on a file with a user-mapped section open")
during Unsloth Studio GGUF exports.

Write to a sibling temp file and swap it in with os.replace, which maps
to MoveFileExW(MOVEFILE_REPLACE_EXISTING) on Windows and rename(2) on
POSIX. The original shard is never absent if the replacement fails, so
a transient AV or indexer lock cannot cause data loss. A single
gc.collect before the first attempt drops the lingering mmap, which is
the load-bearing step for the 1224 case. Short retry loop with
errno/winerror-based lock detection handles AV rescan jitter.

Supersedes #589 with an atomic-replace implementation that closes a
delete-then-move data-loss window present in that PR. Credit to
@PantelisAndrianakis for identifying and reporting the issue.
---
 unsloth_zoo/saving_utils.py | 73 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py
index 4800306ff..aa82a3731 100644
--- a/unsloth_zoo/saving_utils.py
+++ b/unsloth_zoo/saving_utils.py
@@ -792,7 +792,78 @@ def _merge_and_overwrite_lora(
                         tensors[key] = resized[key]
                     else:
                         tensors[key] = f.get_tensor(key)
-            save_file(tensors, filename_original)
+
+            # Fix for Windows file locking (WinError 1224: "cannot be performed
+            # on a file with a user-mapped section open"). Root cause: the
+            # safe_open block above memory-maps filename_original via the Rust
+            # safetensors backend, and on Windows the MapViewOfFile section
+            # release can lag after __exit__, so a following save_file that
+            # re-opens the same path for writing can hit WinError 1224. Force a
+            # collect once to drop the lingering mmap, then write to a sibling
+            # temp file and atomically swap it in with os.replace. os.replace
+            # maps to MoveFileExW(MOVEFILE_REPLACE_EXISTING) on Windows and
+            # rename(2) on POSIX, so the original shard is never absent if the
+            # replacement fails.
+            from safetensors import SafetensorError
+
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+            max_retries = 5
+            base_delay  = 0.2  # seconds
+            temp_dir    = os.path.dirname(os.path.abspath(filename_original))
+            fd, tmp_path = tempfile.mkstemp(dir=temp_dir, suffix=".safetensors.tmp")
+            os.close(fd)
+
+            try:
+                for attempt in range(max_retries):
+                    try:
+                        save_file(tensors, tmp_path)
+                        os.replace(tmp_path, filename_original)
+                        tmp_path = None
+                        break
+                    except (OSError, SafetensorError) as e:
+                        winerror  = getattr(e, "winerror", None)
+                        errno_    = getattr(e, "errno", None)
+                        error_msg = str(e).lower()
+                        is_lock_error = (
+                            winerror in {5, 32, 1224}
+                            or errno_ in {5, 13, 32, 1224}
+                            or "user-mapped" in error_msg
+                            or "being used by another process" in error_msg
+                            or "access is denied" in error_msg
+                            or "cannot be performed" in error_msg
+                        )
+                        if is_lock_error and attempt < max_retries - 1:
+                            delay = base_delay * (2 ** attempt)
+                            if UNSLOTH_ENABLE_LOGGING:
+                                logger.warning(
+                                    f"[Retry {attempt + 1}/{max_retries}] Windows file lock "
+                                    f"detected for {filename_original}: {e}. "
+                                    f"Waiting {delay:.1f}s before retry..."
+                                )
+                            gc.collect()
+                            time.sleep(delay)
+                            continue
+                        if is_lock_error:
+                            raise RuntimeError(
+                                f"Failed to rewrite {filename_original} after {max_retries} "
+                                f"attempts due to Windows file lock. Original shard is intact "
+                                f"(atomic replace never committed). "
+                                f"Solutions: 1) Restart Unsloth Studio 2) Disable antivirus "
+                                f"3) Close File Explorer windows"
+                            ) from e
+                        raise RuntimeError(
+                            f"Model merge failed while rewriting {filename_original}: {e}"
+                        ) from e
+            finally:
+                if tmp_path is not None and os.path.exists(tmp_path):
+                    try:
+                        os.remove(tmp_path)
+                    except OSError:
+                        pass
+
             del tensors
 
         if torch.cuda.is_available():

From 24f5ddec4a46f442f6ed7e3beeaadc83391683d0 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 16 Apr 2026 13:26:10 +0000
Subject: [PATCH 2/4] Fix review findings: gate POSIX false positives, hoist
 save_file, preserve permissions

- Remove errno-based lock detection that false-positives on POSIX (EIO=5,
  EACCES=13, EPIPE=32); keep winerror + Windows-specific string checks only
- Move save_file() before retry loop so multi-GB shards are not re-serialized
  on each os.replace retry
- Preserve original file permissions via os.chmod before os.replace (mkstemp
  creates 0o600, but originals are typically 0o644)
- Gate gc.collect() + torch.cuda.empty_cache() behind os.name == 'nt' since
  the mmap lag is Windows-only
- Use module-level safetensors.SafetensorError instead of in-function import
---
 unsloth_zoo/saving_utils.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py
index aa82a3731..9765fe28c 100644
--- a/unsloth_zoo/saving_utils.py
+++ b/unsloth_zoo/saving_utils.py
@@ -804,11 +804,10 @@ def _merge_and_overwrite_lora(
             # maps to MoveFileExW(MOVEFILE_REPLACE_EXISTING) on Windows and
             # rename(2) on POSIX, so the original shard is never absent if the
             # replacement fails.
-            from safetensors import SafetensorError
-
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
+            if os.name == 'nt':
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
 
             max_retries = 5
             base_delay  = 0.2  # seconds
@@ -817,23 +816,25 @@ def _merge_and_overwrite_lora(
             os.close(fd)
 
             try:
+                save_file(tensors, tmp_path)
+                # Preserve original file permissions (mkstemp creates 0o600)
+                try:
+                    original_mode = os.stat(filename_original).st_mode
+                    os.chmod(tmp_path, original_mode)
+                except OSError:
+                    pass
                 for attempt in range(max_retries):
                     try:
-                        save_file(tensors, tmp_path)
                         os.replace(tmp_path, filename_original)
                         tmp_path = None
                         break
-                    except (OSError, SafetensorError) as e:
+                    except (OSError, safetensors.SafetensorError) as e:
                         winerror  = getattr(e, "winerror", None)
-                        errno_    = getattr(e, "errno", None)
                         error_msg = str(e).lower()
                         is_lock_error = (
                             winerror in {5, 32, 1224}
-                            or errno_ in {5, 13, 32, 1224}
                             or "user-mapped" in error_msg
                             or "being used by another process" in error_msg
-                            or "access is denied" in error_msg
-                            or "cannot be performed" in error_msg
                         )
                         if is_lock_error and attempt < max_retries - 1:
                             delay = base_delay * (2 ** attempt)
@@ -843,7 +844,8 @@ def _merge_and_overwrite_lora(
                                     f"detected for {filename_original}: {e}. "
                                     f"Waiting {delay:.1f}s before retry..."
                                 )
-                            gc.collect()
+                            if os.name == 'nt':
+                                gc.collect()
                             time.sleep(delay)
                             continue
                         if is_lock_error:

From 659116e12a4c02c71be47e1eb919b74c91055e53 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 16 Apr 2026 15:37:08 +0000
Subject: [PATCH 3/4] Fix review findings: POSIX-only direct save, tighten lock
 classifier, cleanup

1. Gate temp-file swap to Windows only (os.name == 'nt'). On POSIX,
   save_file writes directly to the original path, preserving symlinks,
   hardlinks, and avoiding the extra disk copy.

2. Tighten the winerror=5 lock classifier. Bare ACCESS_DENIED no longer
   triggers retry -- only when accompanied by "user-mapped", "being used
   by another process", or "sharing violation" in the error message.

3. Remove dead SafetensorError from except clause. os.replace only raises
   OSError; SafetensorError is unreachable.

4. Release mmap-backed tensor refs (del tensors + gc.collect) before the
   os.replace retry loop, not after. This drops the lingering mmap on
   the source shard before attempting the replace.

5. Preserve exception cause chain in outer except. RuntimeError now
   passes through directly; other exceptions are wrapped with 'from e'
   so __cause__ is not stripped.
---
 unsloth_zoo/saving_utils.py | 146 ++++++++++++++++++++----------------
 1 file changed, 82 insertions(+), 64 deletions(-)

diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py
index 9765fe28c..2c6b9e665 100644
--- a/unsloth_zoo/saving_utils.py
+++ b/unsloth_zoo/saving_utils.py
@@ -793,87 +793,105 @@ def _merge_and_overwrite_lora(
                     else:
                         tensors[key] = f.get_tensor(key)
 
-            # Fix for Windows file locking (WinError 1224: "cannot be performed
-            # on a file with a user-mapped section open"). Root cause: the
-            # safe_open block above memory-maps filename_original via the Rust
-            # safetensors backend, and on Windows the MapViewOfFile section
-            # release can lag after __exit__, so a following save_file that
-            # re-opens the same path for writing can hit WinError 1224. Force a
-            # collect once to drop the lingering mmap, then write to a sibling
-            # temp file and atomically swap it in with os.replace. os.replace
-            # maps to MoveFileExW(MOVEFILE_REPLACE_EXISTING) on Windows and
-            # rename(2) on POSIX, so the original shard is never absent if the
-            # replacement fails.
-            if os.name == 'nt':
+            # Fix for Windows file locking (WinError 1224). On POSIX,
+            # save_file directly -- no temp-file dance needed (preserves
+            # symlinks, hardlinks, and avoids extra disk usage).
+            if os.name != "nt":
+                save_file(tensors, filename_original)
+                del tensors
+            else:
+                # Windows: atomic temp-file + os.replace with retry.
+                # The safe_open block above memory-maps filename_original
+                # via the Rust safetensors backend, and on Windows the
+                # MapViewOfFile section release can lag after __exit__,
+                # so a following save_file that re-opens the same path
+                # for writing can hit WinError 1224.
                 gc.collect()
                 if torch.cuda.is_available():
                     torch.cuda.empty_cache()
 
-            max_retries = 5
-            base_delay  = 0.2  # seconds
-            temp_dir    = os.path.dirname(os.path.abspath(filename_original))
-            fd, tmp_path = tempfile.mkstemp(dir=temp_dir, suffix=".safetensors.tmp")
-            os.close(fd)
-
-            try:
-                save_file(tensors, tmp_path)
-                # Preserve original file permissions (mkstemp creates 0o600)
+                max_retries = 5
+                base_delay  = 0.2  # seconds
+                temp_dir    = os.path.dirname(os.path.abspath(filename_original))
                 try:
                     original_mode = os.stat(filename_original).st_mode
-                    os.chmod(tmp_path, original_mode)
                 except OSError:
-                    pass
-                for attempt in range(max_retries):
-                    try:
-                        os.replace(tmp_path, filename_original)
-                        tmp_path = None
-                        break
-                    except (OSError, safetensors.SafetensorError) as e:
-                        winerror  = getattr(e, "winerror", None)
-                        error_msg = str(e).lower()
-                        is_lock_error = (
-                            winerror in {5, 32, 1224}
-                            or "user-mapped" in error_msg
-                            or "being used by another process" in error_msg
-                        )
-                        if is_lock_error and attempt < max_retries - 1:
-                            delay = base_delay * (2 ** attempt)
-                            if UNSLOTH_ENABLE_LOGGING:
-                                logger.warning(
-                                    f"[Retry {attempt + 1}/{max_retries}] Windows file lock "
-                                    f"detected for {filename_original}: {e}. "
-                                    f"Waiting {delay:.1f}s before retry..."
+                    original_mode = None
+
+                fd, tmp_path = tempfile.mkstemp(dir=temp_dir, suffix=".safetensors.tmp")
+                os.close(fd)
+
+                try:
+                    save_file(tensors, tmp_path)
+                    if original_mode is not None:
+                        try:
+                            os.chmod(tmp_path, original_mode)
+                        except OSError:
+                            pass
+
+                    # Release mmap-backed tensor refs before replacing source shard
+                    del tensors
+                    gc.collect()
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+
+                    for attempt in range(max_retries):
+                        try:
+                            os.replace(tmp_path, filename_original)
+                            tmp_path = None
+                            break
+                        except OSError as e:
+                            winerror  = getattr(e, "winerror", None)
+                            error_msg = str(e).lower()
+                            is_lock_error = (
+                                winerror in {32, 1224}
+                                or (
+                                    winerror == 5 and (
+                                        "user-mapped" in error_msg
+                                        or "being used by another process" in error_msg
+                                        or "sharing violation" in error_msg
+                                    )
                                 )
-                            if os.name == 'nt':
+                                or "user-mapped" in error_msg
+                                or "being used by another process" in error_msg
+                            )
+                            if is_lock_error and attempt < max_retries - 1:
+                                delay = base_delay * (2 ** attempt)
+                                if UNSLOTH_ENABLE_LOGGING:
+                                    logger.warning(
+                                        f"[Retry {attempt + 1}/{max_retries}] Windows file lock "
+                                        f"detected for {filename_original}: {e}. "
+                                        f"Waiting {delay:.1f}s before retry..."
+                                    )
                                 gc.collect()
-                            time.sleep(delay)
-                            continue
-                        if is_lock_error:
+                                time.sleep(delay)
+                                continue
+                            if is_lock_error:
+                                raise RuntimeError(
+                                    f"Failed to rewrite {filename_original} after {max_retries} "
+                                    f"attempts due to Windows file lock. Original shard is intact "
+                                    f"(atomic replace never committed). "
+                                    f"Solutions: 1) Restart Unsloth Studio 2) Disable antivirus "
+                                    f"3) Close File Explorer windows"
+                                ) from e
                             raise RuntimeError(
-                                f"Failed to rewrite {filename_original} after {max_retries} "
-                                f"attempts due to Windows file lock. Original shard is intact "
-                                f"(atomic replace never committed). "
-                                f"Solutions: 1) Restart Unsloth Studio 2) Disable antivirus "
-                                f"3) Close File Explorer windows"
+                                f"Model merge failed while rewriting {filename_original}: {e}"
                             ) from e
-                        raise RuntimeError(
-                            f"Model merge failed while rewriting {filename_original}: {e}"
-                        ) from e
-            finally:
-                if tmp_path is not None and os.path.exists(tmp_path):
-                    try:
-                        os.remove(tmp_path)
-                    except OSError:
-                        pass
-
-            del tensors
+                finally:
+                    if tmp_path is not None and os.path.exists(tmp_path):
+                        try:
+                            os.remove(tmp_path)
+                        except OSError:
+                            pass
 
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return count, safetensor_keys_seen
 
+    except RuntimeError:
+        raise
     except Exception as e:
-        raise RuntimeError(f"Model merge failed with error: {e}")
+        raise RuntimeError(f"Model merge failed with error: {e}") from e
 
     finally:
         # Cleanup memory mapping

From ee0777f636a4df2c8364670696b5db255d76cda3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 16 Apr 2026 15:41:00 +0000
Subject: [PATCH 4/4] Shorten inline comments

---
 unsloth_zoo/saving_utils.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py
index 2c6b9e665..e441f6e5b 100644
--- a/unsloth_zoo/saving_utils.py
+++ b/unsloth_zoo/saving_utils.py
@@ -793,19 +793,12 @@ def _merge_and_overwrite_lora(
                     else:
                         tensors[key] = f.get_tensor(key)
 
-            # Fix for Windows file locking (WinError 1224). On POSIX,
-            # save_file directly -- no temp-file dance needed (preserves
-            # symlinks, hardlinks, and avoids extra disk usage).
+            # POSIX: direct save. Windows: temp-file + os.replace to
+            # avoid WinError 1224 (mmap section release can lag).
             if os.name != "nt":
                 save_file(tensors, filename_original)
                 del tensors
             else:
-                # Windows: atomic temp-file + os.replace with retry.
-                # The safe_open block above memory-maps filename_original
-                # via the Rust safetensors backend, and on Windows the
-                # MapViewOfFile section release can lag after __exit__,
-                # so a following save_file that re-opens the same path
-                # for writing can hit WinError 1224.
                 gc.collect()
                 if torch.cuda.is_available():
                     torch.cuda.empty_cache()
@@ -829,7 +822,7 @@ def _merge_and_overwrite_lora(
                         except OSError:
                             pass
 
-                    # Release mmap-backed tensor refs before replacing source shard
+                    # Drop mmap refs before os.replace
                     del tensors
                     gc.collect()
                     if torch.cuda.is_available():