From b4e8bba49a072d9d6cd50ba76ac9a8b0bd5b757e Mon Sep 17 00:00:00 2001 From: abetlen Date: Wed, 3 Jun 2026 14:11:34 -0700 Subject: [PATCH 1/2] mtmd: handle Gemma 4 audio projector embedding size --- tools/mtmd/clip.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 2008198150dd..54147de566de 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -4347,6 +4347,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return ctx->model.mm_input_proj_w->ne[0]; case PROJECTOR_TYPE_GEMMA4V: case PROJECTOR_TYPE_GEMMA4UV: + case PROJECTOR_TYPE_GEMMA4A: return ctx->model.mm_input_proj_w->ne[1]; case PROJECTOR_TYPE_IDEFICS3: return ctx->model.mm_fc_w->ne[1]; From fd6cad573eea3961d7e245ee77c776f2da66fae7 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 4 Jun 2026 11:13:43 +0200 Subject: [PATCH 2/2] rm projection_dim from clip_n_mmproj_embd --- tools/mtmd/clip.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 54147de566de..80136ed86672 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -4348,6 +4348,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { case PROJECTOR_TYPE_GEMMA4V: case PROJECTOR_TYPE_GEMMA4UV: case PROJECTOR_TYPE_GEMMA4A: + case PROJECTOR_TYPE_GEMMA4UA: return ctx->model.mm_input_proj_w->ne[1]; case PROJECTOR_TYPE_IDEFICS3: return ctx->model.mm_fc_w->ne[1]; @@ -4382,8 +4383,6 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return ctx->model.mm_fc_w->ne[1]; case PROJECTOR_TYPE_LFM2A: return ctx->model.position_embeddings->ne[0]; - case PROJECTOR_TYPE_GEMMA4UA: - return ctx->model.hparams.projection_dim; case PROJECTOR_TYPE_GRANITE_SPEECH: return ctx->model.qf_proj_linear_w->ne[1]; case PROJECTOR_TYPE_GLM4V: