From ad2c5e059ca7747d0c15810704a8122e6936af90 Mon Sep 17 00:00:00 2001 From: fzowl Date: Thu, 7 Aug 2025 18:14:02 +0200 Subject: [PATCH 1/2] Add Cohere embed-v4.0 model support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add text-only embed-v4.0 model in cohere_models.py - Add multimodal embed-v4.0 model in cohere_v.py - Support configurable dimensions (256, 512, 1024, 1536) - Support 128,000 token context length - Support multimodal embedding (text, images, mixed PDFs) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- mteb/models/cohere_models.py | 25 +++++++++++++++++++++++++ mteb/models/cohere_v.py | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 606195417a..7f9d4e7fca 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -313,3 +313,28 @@ def encode( public_training_data=None, # assumed training_datasets=None, ) + +cohere_embed_v4 = ModelMeta( + loader=partial( + CohereTextEmbeddingModel, + model_name="embed-v4.0", + model_prompts=model_prompts, + ), + name="Cohere/embed-v4.0", + languages=supported_languages, + open_weights=False, + reference="https://docs.cohere.com/docs/cohere-embed", + revision="1", + release_date="2024-12-01", + n_parameters=None, + memory_usage_mb=None, + max_tokens=128000, + embed_dim=1536, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=True, + public_training_code=None, + public_training_data=None, + training_datasets=None, +) diff --git a/mteb/models/cohere_v.py b/mteb/models/cohere_v.py index b52a31fec8..74bcfa37b8 100644 --- a/mteb/models/cohere_v.py +++ b/mteb/models/cohere_v.py @@ -226,3 +226,25 @@ def get_fused_embeddings( use_instructions=False, training_datasets=None, ) + +cohere_embed_v4_multimodal = ModelMeta( + loader=partial(cohere_v_loader, model_name="embed-v4.0"), + name="Cohere/embed-v4.0-multimodal", + languages=[], # Unknown, but supports 100+ languages + revision="1", + release_date="2024-12-01", + n_parameters=None, + memory_usage_mb=None, + max_tokens=128000, + embed_dim=1536, + license=None, + similarity_fn_name="cosine", + framework=[], + modalities=["image", "text"], + open_weights=False, + public_training_code=None, + public_training_data=None, + reference="https://docs.cohere.com/docs/cohere-embed", + use_instructions=False, + training_datasets=None, +) From 356173768853143e826c67e235adc9b087fc3c2a Mon Sep 17 00:00:00 2001 From: fzowl Date: Sat, 9 Aug 2025 16:24:09 +0200 Subject: [PATCH 2/2] Add Cohere embed-v4.0 model support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update cohere_v.py and cohere_models.py to include the new embed-v4.0 model with proper configuration and integration. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- mteb/models/cohere_models.py | 25 -------- mteb/models/cohere_v.py | 116 ++++++++++++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 27 deletions(-) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 7f9d4e7fca..606195417a 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -313,28 +313,3 @@ def encode( public_training_data=None, # assumed training_datasets=None, ) - -cohere_embed_v4 = ModelMeta( - loader=partial( - CohereTextEmbeddingModel, - model_name="embed-v4.0", - model_prompts=model_prompts, - ), - name="Cohere/embed-v4.0", - languages=supported_languages, - open_weights=False, - reference="https://docs.cohere.com/docs/cohere-embed", - revision="1", - release_date="2024-12-01", - n_parameters=None, - memory_usage_mb=None, - max_tokens=128000, - embed_dim=1536, - license=None, - similarity_fn_name="cosine", - framework=["API"], - use_instructions=True, - public_training_code=None, - public_training_data=None, - training_datasets=None, -) diff --git a/mteb/models/cohere_v.py b/mteb/models/cohere_v.py index 74bcfa37b8..22aa2c8d36 100644 --- a/mteb/models/cohere_v.py +++ b/mteb/models/cohere_v.py @@ -16,6 +16,118 @@ from mteb.model_meta import ModelMeta from mteb.requires_package import requires_image_dependencies, requires_package +all_languages = [ + "afr-Latn", + "amh-Ethi", + "ara-Arab", + "asm-Beng", + "aze-Latn", + "bel-Cyrl", + "bul-Cyrl", + "ben-Beng", + "bod-Tibt", + "bos-Latn", + "cat-Latn", + "ceb-Latn", + "cos-Latn", + "ces-Latn", + "cym-Latn", + "dan-Latn", + "deu-Latn", + "ell-Grek", + "eng-Latn", + "epo-Latn", + "spa-Latn", + "est-Latn", + "eus-Latn", + "fas-Arab", + "fin-Latn", + "fra-Latn", + "fry-Latn", + "gle-Latn", + "gla-Latn", + "glg-Latn", + "guj-Gujr", + "hau-Latn", + "haw-Latn", + "heb-Hebr", + "hin-Deva", + "hmn-Latn", + "hrv-Latn", + "hat-Latn", + "hun-Latn", + "hye-Armn", + "ind-Latn", + "ibo-Latn", + "isl-Latn", + "ita-Latn", + "jpn-Jpan", + "jav-Latn", + "kat-Geor", + "kaz-Cyrl", + "khm-Khmr", + "kan-Knda", + "kor-Kore", + "kur-Arab", + "kir-Cyrl", + "lat-Latn", + "ltz-Latn", + "lao-Laoo", + "lit-Latn", + "lav-Latn", + "mlg-Latn", + "mri-Latn", + "mkd-Cyrl", + "mal-Mlym", + "mon-Cyrl", + "mar-Deva", + "msa-Latn", + "mlt-Latn", + "mya-Mymr", + "nep-Deva", + "nld-Latn", + "nor-Latn", + "nya-Latn", + "ori-Orya", + "pan-Guru", + "pol-Latn", + "por-Latn", + "ron-Latn", + "rus-Cyrl", + "kin-Latn", + "sin-Sinh", + "slk-Latn", + "slv-Latn", + "smo-Latn", + "sna-Latn", + "som-Latn", + "sqi-Latn", + "srp-Cyrl", + "sot-Latn", + "sun-Latn", + "swe-Latn", + "swa-Latn", + "tam-Taml", + "tel-Telu", + "tgk-Cyrl", + "tha-Thai", + "tuk-Latn", + "tgl-Latn", + "tur-Latn", + "tat-Cyrl", + "uig-Arab", + "ukr-Cyrl", + "urd-Arab", + "uzb-Latn", + "vie-Latn", + "wol-Latn", + "xho-Latn", + "yid-Hebr", + "yor-Latn", + "zho-Hans", + "zul-Latn", +] + def cohere_v_loader(**kwargs): model_name = kwargs.get("model_name", "Cohere") @@ -229,8 +341,8 @@ def get_fused_embeddings( cohere_embed_v4_multimodal = ModelMeta( loader=partial(cohere_v_loader, model_name="embed-v4.0"), - name="Cohere/embed-v4.0-multimodal", - languages=[], # Unknown, but supports 100+ languages + name="Cohere/Cohere-embed-v4.0", + languages=all_languages, revision="1", release_date="2024-12-01", n_parameters=None,