diff --git a/mteb/models/cohere_v.py b/mteb/models/cohere_v.py index b52a31fec8..22aa2c8d36 100644 --- a/mteb/models/cohere_v.py +++ b/mteb/models/cohere_v.py @@ -16,6 +16,118 @@ from mteb.model_meta import ModelMeta from mteb.requires_package import requires_image_dependencies, requires_package +all_languages = [ + "afr-Latn", + "amh-Ethi", + "ara-Arab", + "asm-Beng", + "aze-Latn", + "bel-Cyrl", + "bul-Cyrl", + "ben-Beng", + "bod-Tibt", + "bos-Latn", + "cat-Latn", + "ceb-Latn", + "cos-Latn", + "ces-Latn", + "cym-Latn", + "dan-Latn", + "deu-Latn", + "ell-Grek", + "eng-Latn", + "epo-Latn", + "spa-Latn", + "est-Latn", + "eus-Latn", + "fas-Arab", + "fin-Latn", + "fra-Latn", + "fry-Latn", + "gle-Latn", + "gla-Latn", + "glg-Latn", + "guj-Gujr", + "hau-Latn", + "haw-Latn", + "heb-Hebr", + "hin-Deva", + "hmn-Latn", + "hrv-Latn", + "hat-Latn", + "hun-Latn", + "hye-Armn", + "ind-Latn", + "ibo-Latn", + "isl-Latn", + "ita-Latn", + "jpn-Jpan", + "jav-Latn", + "kat-Geor", + "kaz-Cyrl", + "khm-Khmr", + "kan-Knda", + "kor-Kore", + "kur-Arab", + "kir-Cyrl", + "lat-Latn", + "ltz-Latn", + "lao-Laoo", + "lit-Latn", + "lav-Latn", + "mlg-Latn", + "mri-Latn", + "mkd-Cyrl", + "mal-Mlym", + "mon-Cyrl", + "mar-Deva", + "msa-Latn", + "mlt-Latn", + "mya-Mymr", + "nep-Deva", + "nld-Latn", + "nor-Latn", + "nya-Latn", + "ori-Orya", + "pan-Guru", + "pol-Latn", + "por-Latn", + "ron-Latn", + "rus-Cyrl", + "kin-Latn", + "sin-Sinh", + "slk-Latn", + "slv-Latn", + "smo-Latn", + "sna-Latn", + "som-Latn", + "sqi-Latn", + "srp-Cyrl", + "sot-Latn", + "sun-Latn", + "swe-Latn", + "swa-Latn", + "tam-Taml", + "tel-Telu", + "tgk-Cyrl", + "tha-Thai", + "tuk-Latn", + "tgl-Latn", + "tur-Latn", + "tat-Cyrl", + "uig-Arab", + "ukr-Cyrl", + "urd-Arab", + "uzb-Latn", + "vie-Latn", + "wol-Latn", + "xho-Latn", + "yid-Hebr", + "yor-Latn", + "zho-Hans", + "zul-Latn", +] + def cohere_v_loader(**kwargs): model_name = kwargs.get("model_name", "Cohere") @@ -226,3 +338,25 @@ def get_fused_embeddings( use_instructions=False, training_datasets=None, ) + +cohere_embed_v4_multimodal = ModelMeta( + loader=partial(cohere_v_loader, model_name="embed-v4.0"), + name="Cohere/Cohere-embed-v4.0", + languages=all_languages, + revision="1", + release_date="2024-12-01", + n_parameters=None, + memory_usage_mb=None, + max_tokens=128000, + embed_dim=1536, + license=None, + similarity_fn_name="cosine", + framework=[], + modalities=["image", "text"], + open_weights=False, + public_training_code=None, + public_training_data=None, + reference="https://docs.cohere.com/docs/cohere-embed", + use_instructions=False, + training_datasets=None, +)