From 33ef1e1c0429c99f9d5647acea2e98c5d56ce430 Mon Sep 17 00:00:00 2001 From: Mukkesh Ganesh Date: Sat, 15 Mar 2025 23:58:07 -0700 Subject: [PATCH 1/3] bug fix #2008 unsloth --- unsloth_zoo/vllm_utils.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py index d0d76a840..4c0813b8e 100644 --- a/unsloth_zoo/vllm_utils.py +++ b/unsloth_zoo/vllm_utils.py @@ -542,22 +542,31 @@ def create_empty_causal_lm(config, dtype = torch.float16): @torch.inference_mode -def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16): +def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, bnb_config=None): # All Unsloth Zoo code licensed under LGPLv3 # Unmerges vLLM modules to create HF compatible model config.update({"torch_dtype" : dtype}) # Do not use config file's dtype! new_model = create_empty_causal_lm(config, dtype) quantization_config = getattr(config, "quantization_config", {}) kwargs = dict() - if quantization_config != {}: + if quantization_config != {} or bnb_config: # Get quantization_config flags - compute_dtype = _get_dtype(quantization_config["bnb_4bit_compute_dtype"]) - compute_dtype = dtype # Do not use config file's dtype! - kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"] - kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"] - kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"]) - pass + if quantization_config: + compute_dtype = _get_dtype(quantization_config["bnb_4bit_compute_dtype"]) + compute_dtype = dtype # Do not use config file's dtype! + kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"] + kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"] + kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"]) + + # Get bnb_config flags + elif bnb_config: + compute_dtype = _get_dtype(bnb_config.bnb_4bit_compute_dtype) + compute_dtype = dtype # Do not use config file's dtype! + kwargs["compress_statistics"] = bnb_config.bnb_4bit_use_double_quant + kwargs["quant_type"] = bnb_config.bnb_4bit_quant_type + kwargs["quant_storage"] = _get_dtype(bnb_config.bnb_4bit_quant_storage) + pass from bitsandbytes.nn.modules import Linear4bit, Params4bit from torch.nn.modules import Linear From 0f145cc95042f6b9779b99e6145f3f9d5ba7008b Mon Sep 17 00:00:00 2001 From: Mukkesh Ganesh Date: Sun, 16 Mar 2025 09:14:25 -0700 Subject: [PATCH 2/3] non-quant dtype fix --- unsloth_zoo/vllm_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py index 4c0813b8e..6859baa5f 100644 --- a/unsloth_zoo/vllm_utils.py +++ b/unsloth_zoo/vllm_utils.py @@ -549,19 +549,17 @@ def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, new_model = create_empty_causal_lm(config, dtype) quantization_config = getattr(config, "quantization_config", {}) kwargs = dict() + compute_dtype = dtype # Do not use config file's dtype! + if quantization_config != {} or bnb_config: # Get quantization_config flags if quantization_config: - compute_dtype = _get_dtype(quantization_config["bnb_4bit_compute_dtype"]) - compute_dtype = dtype # Do not use config file's dtype! kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"] kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"] kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"]) # Get bnb_config flags elif bnb_config: - compute_dtype = _get_dtype(bnb_config.bnb_4bit_compute_dtype) - compute_dtype = dtype # Do not use config file's dtype! kwargs["compress_statistics"] = bnb_config.bnb_4bit_use_double_quant kwargs["quant_type"] = bnb_config.bnb_4bit_quant_type kwargs["quant_storage"] = _get_dtype(bnb_config.bnb_4bit_quant_storage) From 62973b458f117cd53603333971c27e7c9c84e42d Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 16 Mar 2025 15:13:20 -0700 Subject: [PATCH 3/3] Update vllm_utils.py --- unsloth_zoo/vllm_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py index 6859baa5f..8631a6c3a 100644 --- a/unsloth_zoo/vllm_utils.py +++ b/unsloth_zoo/vllm_utils.py @@ -542,7 +542,7 @@ def create_empty_causal_lm(config, dtype = torch.float16): @torch.inference_mode -def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, bnb_config=None): +def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, bnb_config = None): # All Unsloth Zoo code licensed under LGPLv3 # Unmerges vLLM modules to create HF compatible model config.update({"torch_dtype" : dtype}) # Do not use config file's dtype! @@ -551,15 +551,15 @@ def convert_vllm_to_huggingface(quant_state_dict, config, dtype = torch.float16, kwargs = dict() compute_dtype = dtype # Do not use config file's dtype! - if quantization_config != {} or bnb_config: + if quantization_config != {} or bnb_config is not None: # Get quantization_config flags - if quantization_config: + if quantization_config != {}: kwargs["compress_statistics"] = quantization_config["bnb_4bit_use_double_quant"] kwargs["quant_type"] = quantization_config["bnb_4bit_quant_type"] kwargs["quant_storage"] = _get_dtype(quantization_config["bnb_4bit_quant_storage"]) # Get bnb_config flags - elif bnb_config: + elif bnb_config is not None: kwargs["compress_statistics"] = bnb_config.bnb_4bit_use_double_quant kwargs["quant_type"] = bnb_config.bnb_4bit_quant_type kwargs["quant_storage"] = _get_dtype(bnb_config.bnb_4bit_quant_storage)