From 33ef1e1c0429c99f9d5647acea2e98c5d56ce430 Mon Sep 17 00:00:00 2001
From: Mukkesh Ganesh <mukmckenzie@gmail.com>
Date: Sat, 15 Mar 2025 23:58:07 -0700
Subject: [PATCH 1/3] bug fix #2008 unsloth

---
 unsloth_zoo/vllm_utils.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py
index d0d76a840..4c0813b8e 100644
--- a/unsloth_zoo/vllm_utils.py
+++ b/unsloth_zoo/vllm_utils.py
@@ -542,22 +542,31 @@ def create_empty_causal_lm(config, dtype = torch.float16):
 
 
 @torch.inference_mode
-def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16):
+def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, bnb_config=None):
     # All Unsloth Zoo code licensed under LGPLv3
     # Unmerges vLLM modules to create HF compatible model
     config.update({"torch_dtype" : dtype}) # Do not use config file's dtype!
     new_model = create_empty_causal_lm(config, dtype)
     quantization_config = getattr(config, "quantization_config", {})
     kwargs = dict()
-    if quantization_config != {}:
+    if quantization_config != {} or bnb_config:
         # Get quantization_config flags
-        compute_dtype = _get_dtype(quantization_config["bnb_4bit_compute_dtype"])
-        compute_dtype = dtype # Do not use config file's dtype!
-        kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"]
-        kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"]
-        kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"])
-    pass
+        if quantization_config:
+            compute_dtype = _get_dtype(quantization_config["bnb_4bit_compute_dtype"])
+            compute_dtype = dtype  # Do not use config file's dtype!
+            kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"]
+            kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"]
+            kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"])
+
+        # Get bnb_config flags
+        elif bnb_config:
+            compute_dtype = _get_dtype(bnb_config.bnb_4bit_compute_dtype)
+            compute_dtype = dtype  # Do not use config file's dtype!
+            kwargs["compress_statistics"] = bnb_config.bnb_4bit_use_double_quant
+            kwargs["quant_type"] = bnb_config.bnb_4bit_quant_type
+            kwargs["quant_storage"] = _get_dtype(bnb_config.bnb_4bit_quant_storage)
 
+    pass
     from bitsandbytes.nn.modules import Linear4bit, Params4bit
     from torch.nn.modules import Linear
 

From 0f145cc95042f6b9779b99e6145f3f9d5ba7008b Mon Sep 17 00:00:00 2001
From: Mukkesh Ganesh <mukmckenzie@gmail.com>
Date: Sun, 16 Mar 2025 09:14:25 -0700
Subject: [PATCH 2/3] non-quant dtype fix

---
 unsloth_zoo/vllm_utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py
index 4c0813b8e..6859baa5f 100644
--- a/unsloth_zoo/vllm_utils.py
+++ b/unsloth_zoo/vllm_utils.py
@@ -549,19 +549,17 @@ def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16,
     new_model = create_empty_causal_lm(config, dtype)
     quantization_config = getattr(config, "quantization_config", {})
     kwargs = dict()
+    compute_dtype = dtype  # Do not use config file's dtype!
+
     if quantization_config != {} or bnb_config:
         # Get quantization_config flags
         if quantization_config:
-            compute_dtype = _get_dtype(quantization_config["bnb_4bit_compute_dtype"])
-            compute_dtype = dtype  # Do not use config file's dtype!
             kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"]
             kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"]
             kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"])
 
         # Get bnb_config flags
         elif bnb_config:
-            compute_dtype = _get_dtype(bnb_config.bnb_4bit_compute_dtype)
-            compute_dtype = dtype  # Do not use config file's dtype!
             kwargs["compress_statistics"] = bnb_config.bnb_4bit_use_double_quant
             kwargs["quant_type"] = bnb_config.bnb_4bit_quant_type
             kwargs["quant_storage"] = _get_dtype(bnb_config.bnb_4bit_quant_storage)

From 62973b458f117cd53603333971c27e7c9c84e42d Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 16 Mar 2025 15:13:20 -0700
Subject: [PATCH 3/3] Update vllm_utils.py

---
 unsloth_zoo/vllm_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py
index 6859baa5f..8631a6c3a 100644
--- a/unsloth_zoo/vllm_utils.py
+++ b/unsloth_zoo/vllm_utils.py
@@ -542,7 +542,7 @@ def create_empty_causal_lm(config, dtype = torch.float16):
 
 
 @torch.inference_mode
-def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, bnb_config=None):
+def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, bnb_config = None):
     # All Unsloth Zoo code licensed under LGPLv3
     # Unmerges vLLM modules to create HF compatible model
     config.update({"torch_dtype" : dtype}) # Do not use config file's dtype!
@@ -551,15 +551,15 @@ def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16,
     kwargs = dict()
     compute_dtype = dtype  # Do not use config file's dtype!
 
-    if quantization_config != {} or bnb_config:
+    if quantization_config != {} or bnb_config is not None:
         # Get quantization_config flags
-        if quantization_config:
+        if quantization_config != {}:
             kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"]
             kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"]
             kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"])
 
         # Get bnb_config flags
-        elif bnb_config:
+        elif bnb_config is not None:
             kwargs["compress_statistics"] = bnb_config.bnb_4bit_use_double_quant
             kwargs["quant_type"] = bnb_config.bnb_4bit_quant_type
             kwargs["quant_storage"] = _get_dtype(bnb_config.bnb_4bit_quant_storage)