diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 5bc9529e60..11423f9066 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2024.12.1" +__version__ = "2024.12.2" __all__ = [ "prepare_model_for_kbit_training", diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py index b2f73aa6c2..0ba03ce01c 100644 --- a/unsloth/models/mapper.py +++ b/unsloth/models/mapper.py @@ -452,33 +452,45 @@ "unsloth/Llama-3.1-Nemotron-70B-Instruct", "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", ), - "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit" : ( + "unsloth/Qwen2-VL-2B-Instruct-unsloth-bnb-4bit" : ( "unsloth/Qwen2-VL-2B-Instruct", "Qwen/Qwen2-VL-2B-Instruct", + "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit", ), - "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit" : ( + "unsloth/Qwen2-VL-7B-Instruct-unsloth-bnb-4bit" : ( "unsloth/Qwen2-VL-7B-Instruct", "Qwen/Qwen2-VL-7B-Instruct", + "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", ), - "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit" : ( + "unsloth/Qwen2-VL-72B-Instruct-unsloth-bnb-4bit" : ( + "unsloth/Qwen2-VL-72B-Instruct", + "Qwen/Qwen2-VL-72B-Instruct", + "unsloth/Qwen2-VL-72B-Instruct-bnb-4bit", + ), + "unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit" : ( "unsloth/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.2-11B-Vision-Instruct", + "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", ), - "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit" : ( + "unsloth/Llama-3.2-90B-Vision-Instruct-unsloth-bnb-4bit" : ( "unsloth/Llama-3.2-90B-Vision-Instruct", "meta-llama/Llama-3.2-90B-Vision-Instruct", + "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit", ), - "unsloth/Llama-3.2-11B-Vision-bnb-4bit" : ( + "unsloth/Llama-3.2-11B-Vision-unsloth-bnb-4bit" : ( "unsloth/Llama-3.2-11B-Vision", "meta-llama/Llama-3.2-11B-Vision", + "unsloth/Llama-3.2-11B-Vision-bnb-4bit", ), - "unsloth/Llama-3.2-90B-Vision-bnb-4bit" : ( + "unsloth/Llama-3.2-90B-Vision-unsloth-bnb-4bit" : ( "unsloth/Llama-3.2-90B-Vision", "meta-llama/Llama-3.2-90B-Vision", + "unsloth/Llama-3.2-90B-Vision-bnb-4bit", ), - "unsloth/Pixtral-12B-2409-bnb-4bit" : ( + "unsloth/Pixtral-12B-2409-unsloth-bnb-4bit" : ( "unsloth/Pixtral-12B-2409", "mistralai/Pixtral-12B-2409", + "unsloth/Pixtral-12B-2409-bnb-4bit", ), "unsloth/Pixtral-12B-2409-Base-bnb-4bit" : ( "unsloth/Pixtral-12B-Base-2409", @@ -500,6 +512,10 @@ "unsloth/Llama-3.1-Tulu-3-70B", "allenai/Llama-3.1-Tulu-3-70B", ), + "unsloth/QwQ-32B-Preview-bnb-4bit" : ( + "unsloth/QwQ-32B-Preview", + "Qwen/QwQ-32B-Preview", + ), } INT_TO_FLOAT_MAPPER = {} @@ -519,6 +535,14 @@ MAP_TO_UNSLOTH_16bit[values[1]] = values[0] MAP_TO_UNSLOTH_16bit[values[1].lower()] = values[0] pass + elif len(values) == 3: + # Dynamic Unsloth quantization + if values[0].startswith("unsloth"): + MAP_TO_UNSLOTH_16bit[values[1]] = values[0] + MAP_TO_UNSLOTH_16bit[values[1].lower()] = values[0] + MAP_TO_UNSLOTH_16bit[values[2]] = values[0] + MAP_TO_UNSLOTH_16bit[values[2].lower()] = values[0] + pass pass # Get lowercased diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index aa4cc09022..0f682c6b40 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -30,7 +30,7 @@ from unsloth_zoo.peft_utils import ( get_peft_regex, merge_and_overwrite_lora, - # SKIP_QUANTIZATION_MODULES, + SKIP_QUANTIZATION_MODULES, ) from triton import __version__ as triton_version @@ -133,7 +133,7 @@ def from_pretrained( bnb_4bit_use_double_quant = True, bnb_4bit_quant_type = "nf4", bnb_4bit_compute_dtype = dtype, - # llm_int8_skip_modules = SKIP_QUANTIZATION_MODULES, + llm_int8_skip_modules = SKIP_QUANTIZATION_MODULES, ) pass