Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions conversion/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,14 +786,15 @@ def set_gguf_parameters(self):
super().set_gguf_parameters()

# vision params
assert self.hparams_vision is not None
self.gguf_writer.add_clip_vision_projector_type(gguf.VisionProjectorType.GEMMA4V)
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-6))
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams_vision.get("layer_norm_eps", 1e-6))

# audio params
if self.hparams_audio:
self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.GEMMA4A)
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["feat_in"])
self.gguf_writer.add_audio_attention_layernorm_eps(1e-5)
assert self.hparams_audio is not None
self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.GEMMA4A)
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["feat_in"])
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-6))

def is_audio_tensor(self, name: str) -> bool:
return "audio_tower" in name or "embed_audio" in name
Expand Down
3 changes: 3 additions & 0 deletions tools/mtmd/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,9 @@ struct clip_model_loader {
hparams.audio_n_fft = 512;
hparams.audio_window_len = 320; // 20ms frame (NOT 25ms/400)
hparams.audio_hop_len = 160;
// due to a mistake in the original converstion code, rms_norm_eps is set to a wrong value
Comment thread
ngxson marked this conversation as resolved.
Outdated
// since all gemma4a models use 1e-6, we just hardcode it here to avoid re-conversion
hparams.eps = 1e-6f;
} break;
case PROJECTOR_TYPE_GRANITE_SPEECH:
{
Expand Down
Loading