Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tools/mtmd/clip-graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct clip_graph {
const int n_patches;
const int n_embd;
const int n_head;
const int n_head_kv;
const int d_head;
const int n_layer;
const int n_mmproj_embd;
Expand Down
10 changes: 7 additions & 3 deletions tools/mtmd/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ clip_graph::clip_graph(clip_ctx * ctx, const clip_image_f32 & img) :
n_patches(n_patches_x * n_patches_y),
n_embd(hparams.n_embd),
n_head(hparams.n_head),
n_head_kv(hparams.n_head_kv),
d_head(n_embd / n_head),
n_layer(hparams.n_layer),
n_mmproj_embd(clip_n_mmproj_embd(ctx)),
Expand Down Expand Up @@ -401,9 +402,9 @@ ggml_tensor * clip_graph::build_vit(
}
}

Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos);
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_pos);
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_pos);
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos);
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head_kv, n_pos);
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head_kv, n_pos);

if (norm_per_head) {
if (layer.q_norm) {
Expand Down Expand Up @@ -1120,6 +1121,9 @@ struct clip_model_loader {
get_u32(string_format(KEY_PROJ_DIM, prefix), hparams.projection_dim);
get_f32(string_format(KEY_LAYER_NORM_EPS, prefix), hparams.eps);

// n_head_kv is optional (for GQA), default to n_head
hparams.n_head_kv = hparams.n_head;

if (is_vision) {
get_u32(KEY_IMAGE_SIZE, hparams.image_size);
get_u32(KEY_PATCH_SIZE, hparams.patch_size);
Expand Down
Loading