-
Notifications
You must be signed in to change notification settings - Fork 15.6k
Add Kimi-K2.5 support #19170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Add Kimi-K2.5 support #19170
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
042c3cb
Move dequant_model to after the text_config merge
AesSedai a4c9a08
Fix a couple of oversights
AesSedai 9c44981
Add image support for Kimi-K2.5
AesSedai 9b14cb8
Revert changes to KimiVLForConditionalGeneration
AesSedai 37a386d
Fix an assert crash
AesSedai b1cf34e
Fix permute swapping w / h on accident
AesSedai f13b383
Merge remote-tracking branch 'origin/master' into kimi-k2.5
AesSedai be1b0c3
Kimi-K2.5: Use merged QKV for vision
AesSedai 052fda6
Kimi-K2.5: pre-convert vision QK to use build_rope_2d
AesSedai 0c50dd9
Kimi-K2.5: support non-interleaved rope for vision
AesSedai d0d1062
Kimi-K2.5: fix min / max pixel
AesSedai c895365
Kimi-K2.5: remove v/o permutes, unnecessary
AesSedai 7b4af22
Kimi-K2.5: update permute name to match
AesSedai 16010cb
Merge remote-tracking branch 'origin/master' into kimi-k2.5
AesSedai c5de0ef
Update convert_hf_to_gguf.py
AesSedai 2d7c44a
Kimi-K2.5: replace build_rope_2d ggml_cont with ggml_view_3d pointers
AesSedai File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -672,8 +672,8 @@ ggml_tensor * clip_graph::build_rope_2d( | |
| { | ||
| first = ggml_view_3d(ctx0, cur, | ||
| n_dim/2, n_head, n_pos, | ||
| ggml_row_size(cur->type, n_dim), | ||
| ggml_row_size(cur->type, n_dim*n_head), | ||
| cur->nb[1], | ||
| cur->nb[2], | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ngxson making sure you see this change and the one below in the second view too. Adjusting this removed the need for the |
||
| 0); | ||
| first = ggml_rope_ext( | ||
| ctx0, | ||
|
|
@@ -691,8 +691,8 @@ ggml_tensor * clip_graph::build_rope_2d( | |
| { | ||
| second = ggml_view_3d(ctx0, cur, | ||
| n_dim/2, n_head, n_pos, | ||
| ggml_row_size(cur->type, n_dim), | ||
| ggml_row_size(cur->type, n_dim*n_head), | ||
| cur->nb[1], | ||
| cur->nb[2], | ||
| n_dim/2 * ggml_element_size(cur)); | ||
| second = ggml_rope_ext( | ||
| ctx0, | ||
|
|
@@ -825,6 +825,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 | |
| { | ||
| builder = std::make_unique<clip_graph_kimivl>(ctx, img); | ||
| } break; | ||
| case PROJECTOR_TYPE_KIMIK25: | ||
| { | ||
| builder = std::make_unique<clip_graph_kimik25>(ctx, img); | ||
| } break; | ||
| case PROJECTOR_TYPE_COGVLM: | ||
| { | ||
| builder = std::make_unique<clip_graph_cogvlm>(ctx, img); | ||
|
|
@@ -1139,6 +1143,22 @@ struct clip_model_loader { | |
| hparams.set_limit_image_tokens(8, 1024); | ||
| hparams.set_warmup_n_tokens(256); // avoid OOM on warmup | ||
| } break; | ||
| case PROJECTOR_TYPE_KIMIK25: | ||
| { | ||
| hparams.rope_theta = 10000.0f; | ||
| get_u32(KEY_PROJ_SCALE_FACTOR, hparams.n_merge, false); | ||
|
|
||
| int min_pixels = 0, max_pixels = 0; | ||
| get_u32(KEY_IMAGE_MIN_PIXELS, min_pixels, false); | ||
| get_u32(KEY_IMAGE_MAX_PIXELS, max_pixels, false); | ||
| if (min_pixels > 0 && max_pixels > 0) { | ||
| hparams.image_min_pixels = min_pixels; | ||
| hparams.image_max_pixels = max_pixels; | ||
| hparams.warmup_image_size = static_cast<int>(std::sqrt(max_pixels)); | ||
| } else { | ||
| hparams.set_limit_image_tokens(2, 4096); | ||
| } | ||
| } break; | ||
| case PROJECTOR_TYPE_GEMMA3: | ||
| { | ||
| // default value (used by all model sizes in gemma 3 family) | ||
|
|
@@ -1668,6 +1688,7 @@ struct clip_model_loader { | |
| model.mm_2_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias")); | ||
| } break; | ||
| case PROJECTOR_TYPE_KIMIVL: | ||
| case PROJECTOR_TYPE_KIMIK25: | ||
| { | ||
| model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM); | ||
| model.mm_input_norm_b = get_tensor(TN_MM_INP_NORM_B); | ||
|
|
@@ -3039,6 +3060,23 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str | |
| res_imgs->entries.push_back(std::move(res)); | ||
| } break; | ||
|
|
||
| case PROJECTOR_TYPE_KIMIK25: | ||
| { | ||
| GGML_ASSERT(params.image_min_pixels > 0 && params.image_max_pixels > 0); | ||
| const clip_image_size target_size = img_tool::calc_size_preserved_ratio( | ||
| original_size, | ||
| params.patch_size * params.n_merge, | ||
| params.image_min_pixels, | ||
| params.image_max_pixels); | ||
| const std::array<uint8_t, 3> pad_color = {0, 0, 0}; | ||
|
|
||
| clip_image_u8 resized_img; | ||
| img_tool::resize(*img, resized_img, target_size, img_tool::RESIZE_ALGO_BICUBIC, true, pad_color); | ||
| clip_image_f32_ptr res(clip_image_f32_init()); | ||
| normalize_image_u8_to_f32(resized_img, *res, params.image_mean, params.image_std); | ||
| res_imgs->entries.push_back(std::move(res)); | ||
| } break; | ||
|
|
||
| case PROJECTOR_TYPE_MLP: | ||
| case PROJECTOR_TYPE_MLP_NORM: | ||
| case PROJECTOR_TYPE_LDP: | ||
|
|
@@ -3247,6 +3285,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im | |
| } break; | ||
| case PROJECTOR_TYPE_LFM2: | ||
| case PROJECTOR_TYPE_KIMIVL: | ||
| case PROJECTOR_TYPE_KIMIK25: | ||
| { | ||
| // dynamic size | ||
| int out_patch_size = params.patch_size * ctx->model.hparams.n_merge; | ||
|
|
@@ -3588,6 +3627,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima | |
| } break; | ||
| case PROJECTOR_TYPE_PIXTRAL: | ||
| case PROJECTOR_TYPE_KIMIVL: | ||
| case PROJECTOR_TYPE_KIMIK25: | ||
| case PROJECTOR_TYPE_LIGHTONOCR: | ||
| { | ||
| // set the 2D positions | ||
|
|
@@ -3724,6 +3764,47 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima | |
| ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings)); | ||
| } | ||
|
|
||
| // Debug: dump final embeddings if MTMD_DEBUG_EMBEDDINGS is set | ||
| if (std::getenv("MTMD_DEBUG_EMBEDDINGS") != nullptr) { | ||
| const int64_t n_embd = embeddings->ne[0]; | ||
| const int64_t n_tokens = embeddings->ne[1]; | ||
| std::vector<float> emb_data(n_embd * n_tokens); | ||
| ggml_backend_tensor_get(embeddings, emb_data.data(), 0, ggml_nbytes(embeddings)); | ||
|
|
||
| LOG_INF("\n=== MTMD_DEBUG_EMBEDDINGS ===\n"); | ||
| LOG_INF("Shape: [%lld, %lld]\n", (long long)n_embd, (long long)n_tokens); | ||
|
|
||
| // Print first few values of first token | ||
| LOG_INF("Token 0 (first 16 values): "); | ||
| for (int i = 0; i < std::min((int64_t)16, n_embd); i++) { | ||
| LOG_INF("%.6f ", emb_data[i]); | ||
| } | ||
| LOG_INF("\n"); | ||
|
|
||
| // Print last few values of first token | ||
| if (n_embd > 16) { | ||
| LOG_INF("Token 0 (last 16 values): "); | ||
| for (int64_t i = n_embd - 16; i < n_embd; i++) { | ||
| LOG_INF("%.6f ", emb_data[i]); | ||
| } | ||
| LOG_INF("\n"); | ||
| } | ||
|
|
||
| // Compute and print statistics | ||
| float sum = 0.0f, sum_sq = 0.0f, min_val = emb_data[0], max_val = emb_data[0]; | ||
| for (size_t i = 0; i < emb_data.size(); i++) { | ||
| sum += emb_data[i]; | ||
| sum_sq += emb_data[i] * emb_data[i]; | ||
| min_val = std::min(min_val, emb_data[i]); | ||
| max_val = std::max(max_val, emb_data[i]); | ||
| } | ||
| float mean = sum / emb_data.size(); | ||
| float variance = (sum_sq / emb_data.size()) - (mean * mean); | ||
| LOG_INF("Stats: mean=%.6f, std=%.6f, min=%.6f, max=%.6f, sum=%.6f\n", | ||
| mean, sqrtf(variance), min_val, max_val, sum); | ||
| LOG_INF("=== END MTMD_DEBUG_EMBEDDINGS ===\n\n"); | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -3770,6 +3851,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { | |
| return ctx->model.mm_2_w->ne[1]; | ||
| case PROJECTOR_TYPE_LFM2: | ||
| case PROJECTOR_TYPE_KIMIVL: | ||
| case PROJECTOR_TYPE_KIMIK25: | ||
| return ctx->model.mm_2_w->ne[1]; | ||
| case PROJECTOR_TYPE_COGVLM: | ||
| return ctx->model.mm_4h_to_h_w->ne[1]; | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.