From f205ac82a5c1f0c581814babfca5b88840e91235 Mon Sep 17 00:00:00 2001 From: Pantelis Andrianakis <34748735+PantelisAndrianakis@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:42:16 +0300 Subject: [PATCH 1/5] Fix for Windows file locking (os error 1224) Implement robust Windows file locking handling in model export The export process was failing with os error 1224 ("file with a user-mapped section open") when trying to save merged LoRA weights on Windows. This occurred because safetensors was memory-mapping files that couldn't be replaced due to locks held by the kernel or other processes. Changes: - Add retry logic with exponential backoff (10 attempts, up to ~1.6s wait) - Force garbage collection and CUDA cache cleanup before each write attempt - Attempt to delete the original locked file before writing - Write directly to target location instead of temp file (simpler atomic ops) - Improved error messages with clear remediation steps This allows GGUF exports to succeed on Windows systems with aggressive file locking (antivirus, indexing, etc.). Users can now export Gemma4 models directly in Unsloth Studio without workarounds. Tested with: Gemma4 model, Q4_K_M quantization, Windows 11 Pro --- unsloth_zoo/saving_utils.py | 49 ++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index 4800306ff..01c8f0c56 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -792,7 +792,54 @@ def _merge_and_overwrite_lora( tensors[key] = resized[key] else: tensors[key] = f.get_tensor(key) - save_file(tensors, filename_original) + + # Fix for Windows file locking (os error 1224) + # Use retry logic with aggressive locking cleanup + import time + import tempfile + import shutil + import os as os_module + + max_retries = 10 + base_delay = 0.2 # seconds + + for attempt in range(max_retries): + try: + # Force garbage collection and CUDA cache cleanup + import gc + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + # Aggressive: Close and remove original file if it exists + if os_module.path.exists(filename_original): + try: + os_module.remove(filename_original) + if UNSLOTH_ENABLE_LOGGING: + logger.debug(f"Removed locked file: {filename_original}") + except (OSError, IOError): + # File still locked, will retry + pass + + # Write directly to target location + save_file(tensors, filename_original) + break # Success + + except (OSError, IOError) as e: + if attempt < max_retries - 1: + # Exponential backoff + delay = base_delay * (2 ** (attempt // 2)) + if UNSLOTH_ENABLE_LOGGING: + logger.warning( + f"[Retry {attempt + 1}/{max_retries}] File lock: {e}. " + f"Waiting {delay:.1f}s before retry..." + ) + time.sleep(delay) + else: + raise RuntimeError( + f"Failed to save file after {max_retries} attempts: {e}. " + ) + del tensors if torch.cuda.is_available(): From 4fa3009710ba07864dfcccdd2411fa73dacfce92 Mon Sep 17 00:00:00 2001 From: Pantelis Andrianakis <34748735+PantelisAndrianakis@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:48:57 +0300 Subject: [PATCH 2/5] Removed unsloth_zoo/saving_utils.py redundant imports. Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- unsloth_zoo/saving_utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index 01c8f0c56..d1edbe673 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -795,11 +795,6 @@ def _merge_and_overwrite_lora( # Fix for Windows file locking (os error 1224) # Use retry logic with aggressive locking cleanup - import time - import tempfile - import shutil - import os as os_module - max_retries = 10 base_delay = 0.2 # seconds From 41997c33b3458c10600239ba9ededef3c8cd123f Mon Sep 17 00:00:00 2001 From: Pantelis Andrianakis <34748735+PantelisAndrianakis@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:50:08 +0300 Subject: [PATCH 3/5] Removed redundant unsloth_zoo/saving_utils.py local import of gc Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- unsloth_zoo/saving_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index d1edbe673..e30dd33ca 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -801,15 +801,15 @@ def _merge_and_overwrite_lora( for attempt in range(max_retries): try: # Force garbage collection and CUDA cache cleanup - import gc + # Force garbage collection and CUDA cache cleanup gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() # Aggressive: Close and remove original file if it exists - if os_module.path.exists(filename_original): + if os.path.exists(filename_original): try: - os_module.remove(filename_original) + os.remove(filename_original) if UNSLOTH_ENABLE_LOGGING: logger.debug(f"Removed locked file: {filename_original}") except (OSError, IOError): From 06249dd53d6599649131acf3cd4fd4acd994d714 Mon Sep 17 00:00:00 2001 From: Pantelis Andrianakis <34748735+PantelisAndrianakis@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:57:20 +0300 Subject: [PATCH 4/5] Use retry logic with safe atomic operations Applied the safe atomic write fix to the new version. The key differences: - Write to temp file first (original stays intact) - Delete original only after successful write (no data loss) - Atomic move replaces with temp file - Cleanup on failure --- unsloth_zoo/saving_utils.py | 45 ++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index e30dd33ca..5b56a0b58 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -794,31 +794,49 @@ def _merge_and_overwrite_lora( tensors[key] = f.get_tensor(key) # Fix for Windows file locking (os error 1224) - # Use retry logic with aggressive locking cleanup + # Use retry logic with safe atomic operations + import tempfile + import shutil + max_retries = 10 base_delay = 0.2 # seconds + temp_dir = os.path.dirname(filename_original) for attempt in range(max_retries): try: - # Force garbage collection and CUDA cache cleanup # Force garbage collection and CUDA cache cleanup gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() - # Aggressive: Close and remove original file if it exists - if os.path.exists(filename_original): - try: + # Create temp file in same directory for atomic replace + with tempfile.NamedTemporaryFile( + delete=False, + dir=temp_dir, + suffix=".safetensors.tmp" + ) as tmp_file: + tmp_path = tmp_file.name + + try: + # Write to temp file (safe - original untouched) + save_file(tensors, tmp_path) + + # Only delete original after successful write + if os.path.exists(filename_original): os.remove(filename_original) - if UNSLOTH_ENABLE_LOGGING: - logger.debug(f"Removed locked file: {filename_original}") - except (OSError, IOError): - # File still locked, will retry - pass - # Write directly to target location - save_file(tensors, filename_original) - break # Success + # Move temp to original location (atomic) + shutil.move(tmp_path, filename_original) + break # Success + + except Exception as write_error: + # Clean up temp file on write failure + try: + if os.path.exists(tmp_path): + os.remove(tmp_path) + except: + pass + raise write_error except (OSError, IOError) as e: if attempt < max_retries - 1: @@ -833,6 +851,7 @@ def _merge_and_overwrite_lora( else: raise RuntimeError( f"Failed to save file after {max_retries} attempts: {e}. " + "Keep original shard on write failure - no data loss." ) del tensors From 74d9ada054c29814acba2fa4b067c7e0359d2b74 Mon Sep 17 00:00:00 2001 From: Pantelis Andrianakis <34748735+PantelisAndrianakis@users.noreply.github.com> Date: Fri, 10 Apr 2026 16:02:25 +0300 Subject: [PATCH 5/5] Import safetensors exception to catch wrapped Windows errors --- unsloth_zoo/saving_utils.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index 5b56a0b58..791dd9316 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -798,6 +798,12 @@ def _merge_and_overwrite_lora( import tempfile import shutil + # Import safetensors exception to catch wrapped Windows errors + try: + from safetensors.torch import SafetensorError + except ImportError: + SafetensorError = Exception # Fallback if not available + max_retries = 10 base_delay = 0.2 # seconds temp_dir = os.path.dirname(filename_original) @@ -838,21 +844,29 @@ def _merge_and_overwrite_lora( pass raise write_error - except (OSError, IOError) as e: - if attempt < max_retries - 1: - # Exponential backoff + except (OSError, IOError, SafetensorError) as e: + # Catch both OS errors and safetensors-wrapped Windows errors + error_msg = str(e).lower() + is_lock_error = "1224" in error_msg or "user-mapped" in error_msg or "cannot be performed" in error_msg + + if is_lock_error and attempt < max_retries - 1: + # Exponential backoff for lock errors delay = base_delay * (2 ** (attempt // 2)) if UNSLOTH_ENABLE_LOGGING: logger.warning( - f"[Retry {attempt + 1}/{max_retries}] File lock: {e}. " + f"[Retry {attempt + 1}/{max_retries}] Windows file lock detected: {e}. " f"Waiting {delay:.1f}s before retry..." ) time.sleep(delay) - else: + elif is_lock_error and attempt == max_retries - 1: raise RuntimeError( - f"Failed to save file after {max_retries} attempts: {e}. " - "Keep original shard on write failure - no data loss." + f"Failed to save file after {max_retries} attempts due to Windows file lock. " + "Original shard preserved - no data loss. " + "Solutions: 1) Restart Unsloth Studio 2) Disable antivirus 3) Close File Explorer windows" ) + else: + # Non-lock errors - fail immediately + raise RuntimeError(f"Model merge failed with error: {e}") del tensors