From b80ca1c31476b2133565c35297802b7ef541db23 Mon Sep 17 00:00:00 2001 From: Fedor Yaronskiy Date: Thu, 9 Oct 2025 22:10:05 +0300 Subject: [PATCH 1/4] Add en code retriever model --- mteb/models/en_code_retriever.py | 39 ++++++++++++++++++++++++++++++++ mteb/models/overview.py | 2 ++ 2 files changed, 41 insertions(+) create mode 100644 mteb/models/en_code_retriever.py diff --git a/mteb/models/en_code_retriever.py b/mteb/models/en_code_retriever.py new file mode 100644 index 0000000000..bf83354f19 --- /dev/null +++ b/mteb/models/en_code_retriever.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from functools import partial + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta, sentence_transformers_loader + +model_name = "fyaronskiy/english_code_retriever" +revision = "be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c" + + +english_code_retriever = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name=model_name, + revision=revision, + model_prompts={ + PromptType.query.value: "search_query: ", + PromptType.document.value: "search_document: ", + }, + ), + name=model_name, + languages=["eng-Latn"], + open_weights=True, + revision=revision, + release_date="2025-07-10", + n_parameters=149_000_000, + memory_usage_mb=568, + embed_dim=768, + license="mit", + max_tokens=8192, + reference="https://huggingface.co/fyaronskiy/english_code_retriever", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + public_training_code=None, + public_training_data="https://huggingface.co/datasets/code-search-net/code_search_net", + training_datasets={"CodeSearchNet": ["train"]}, +) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index edbd06ecaa..5f6fd41a44 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -39,6 +39,7 @@ e5_instruct, e5_models, e5_v, + en_code_retriever, evaclip_models, fa_models, geogpt_models, @@ -139,6 +140,7 @@ e5_instruct, e5_models, e5_v, + en_code_retriever, evaclip_models, google_models, granite_vision_embedding_models, From f45dfbd23cfb740eff49a317762b145e5ed7a369 Mon Sep 17 00:00:00 2001 From: Fedor Yaronskiy Date: Fri, 10 Oct 2025 00:40:48 +0300 Subject: [PATCH 2/4] fix model_name --- mteb/models/en_code_retriever.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mteb/models/en_code_retriever.py b/mteb/models/en_code_retriever.py index bf83354f19..28f492f29f 100644 --- a/mteb/models/en_code_retriever.py +++ b/mteb/models/en_code_retriever.py @@ -12,17 +12,17 @@ english_code_retriever = ModelMeta( loader=partial( sentence_transformers_loader, - model_name=model_name, - revision=revision, + model_name="fyaronskiy/english_code_retriever", + revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c", model_prompts={ PromptType.query.value: "search_query: ", PromptType.document.value: "search_document: ", }, ), - name=model_name, + name="fyaronskiy/english_code_retriever", languages=["eng-Latn"], open_weights=True, - revision=revision, + revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c", release_date="2025-07-10", n_parameters=149_000_000, memory_usage_mb=568, From bf6312eef2c9be59d94277b1257df2e0112270be Mon Sep 17 00:00:00 2001 From: fedor28 <37560717+fedor28@users.noreply.github.com> Date: Fri, 10 Oct 2025 01:13:32 +0300 Subject: [PATCH 3/4] Update mteb/models/en_code_retriever.py Co-authored-by: Roman Solomatin --- mteb/models/en_code_retriever.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mteb/models/en_code_retriever.py b/mteb/models/en_code_retriever.py index 28f492f29f..8b54664084 100644 --- a/mteb/models/en_code_retriever.py +++ b/mteb/models/en_code_retriever.py @@ -5,9 +5,6 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta, sentence_transformers_loader -model_name = "fyaronskiy/english_code_retriever" -revision = "be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c" - english_code_retriever = ModelMeta( loader=partial( From 7300bf186714c513c4e11a3b8600e543a5464edb Mon Sep 17 00:00:00 2001 From: Fedor Yaronskiy Date: Fri, 10 Oct 2025 14:02:27 +0300 Subject: [PATCH 4/4] correct lint --- mteb/models/en_code_retriever.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/models/en_code_retriever.py b/mteb/models/en_code_retriever.py index 8b54664084..ec410c23be 100644 --- a/mteb/models/en_code_retriever.py +++ b/mteb/models/en_code_retriever.py @@ -5,7 +5,6 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta, sentence_transformers_loader - english_code_retriever = ModelMeta( loader=partial( sentence_transformers_loader,