diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index ca33509f64..f3f1420f8c 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -2718,10 +2718,21 @@ def patch_peft_model( if lora_dropout == 0 and bias == "none": for idx, layer in enumerate(model.model.model.layers): + # Determine MLP module name (falcon_h1 has feed_forward, llama style has mlp) + if hasattr(layer, "mlp"): + mlp_module_name = "mlp" + elif hasattr(layer, "feed_forward"): + mlp_module_name = "feed_forward" + else: + logger.warning_once(f"Unsloth: No MLP module found in layer {idx} so skipping peft mlp patching") + continue + + mlp_module = getattr(layer, mlp_module_name) + # MLP patching - gate_proj = layer.mlp.gate_proj - up_proj = layer.mlp. up_proj - down_proj = layer.mlp.down_proj + gate_proj = mlp_module.gate_proj + up_proj = mlp_module. up_proj + down_proj = mlp_module.down_proj if hasattr(gate_proj, "lora_A") and \ hasattr( up_proj, "lora_A") and \ @@ -2734,7 +2745,7 @@ def patch_peft_model( (len(getattr(down_proj, "lora_magnitude_vector", []) or []) == 0): # https://stackoverflow.com/questions/50599045/python-replacing-a-function-within-a-class-of-a-module - layer.mlp.forward = types.MethodType(_apply_lora_mlp, layer.mlp) + mlp_module.forward = types.MethodType(_apply_lora_mlp, mlp_module) n_mlp += 1 else: logger.warning_once(