diff --git a/conversion/gemma.py b/conversion/gemma.py index 379876629fb..1258428b046 100644 --- a/conversion/gemma.py +++ b/conversion/gemma.py @@ -812,10 +812,11 @@ def set_gguf_parameters(self): self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams_vision.get("layer_norm_eps", 1e-6)) # audio params - assert self.hparams_audio is not None - self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.GEMMA4A) - self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["feat_in"]) - self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-6)) + if self.has_audio_encoder: + assert self.hparams_audio is not None + self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.GEMMA4A) + self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["feat_in"]) + self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-6)) def is_audio_tensor(self, name: str) -> bool: return "audio_tower" in name or "embed_audio" in name