unslothai · danielhanchen · Dec 5, 2024 · Oct 27, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/unsloth/models/__init__.py b/unsloth/models/__init__.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
+from .granite import FastGraniteModel
 from .loader  import FastLanguageModel, FastVisionModel
 from .llama   import FastLlamaModel
 from .mistral import FastMistralModel

@@ -188,7 +188,7 @@ def patch_mistral_nemo_config(config):
 
 from transformers import __version__ as transformers_version
 from transformers import PretrainedConfig
-model_architectures = ["llama", "mistral", "gemma", "gemma2", "qwen2",]
+model_architectures = ["llama", "mistral", "gemma", "gemma2", "qwen2", "granite"]
 
 for model_name in model_architectures:
     config_filepath = f"transformers.models.{model_name}.configuration_{model_name}"

@@ -193,7 +193,7 @@ def Gemma2DecoderLayer_fast_forward(
             output_attentions=output_attentions,
             use_cache=use_cache,
             padding_mask=padding_mask,
-            _flag_for_generation=True,
+            _flag_for_generation=self._flag_for_generation,
         )
         hidden_states = fast_rms_layernorm_inference_gemma(self.post_attention_layernorm, hidden_states, out_weight)
         hidden_states += residual