diff --git a/mteb/models/en_code_retriever.py b/mteb/models/en_code_retriever.py new file mode 100644 index 0000000000..ec410c23be --- /dev/null +++ b/mteb/models/en_code_retriever.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from functools import partial + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta, sentence_transformers_loader + +english_code_retriever = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="fyaronskiy/english_code_retriever", + revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c", + model_prompts={ + PromptType.query.value: "search_query: ", + PromptType.document.value: "search_document: ", + }, + ), + name="fyaronskiy/english_code_retriever", + languages=["eng-Latn"], + open_weights=True, + revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c", + release_date="2025-07-10", + n_parameters=149_000_000, + memory_usage_mb=568, + embed_dim=768, + license="mit", + max_tokens=8192, + reference="https://huggingface.co/fyaronskiy/english_code_retriever", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + public_training_code=None, + public_training_data="https://huggingface.co/datasets/code-search-net/code_search_net", + training_datasets={"CodeSearchNet": ["train"]}, +) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index edbd06ecaa..5f6fd41a44 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -39,6 +39,7 @@ e5_instruct, e5_models, e5_v, + en_code_retriever, evaclip_models, fa_models, geogpt_models, @@ -139,6 +140,7 @@ e5_instruct, e5_models, e5_v, + en_code_retriever, evaclip_models, google_models, granite_vision_embedding_models,