From b3c5b8e2043828cc40ae170326aafc3e9ac29bad Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Wed, 2 Jul 2025 21:37:11 +0300 Subject: [PATCH 1/2] fix sbert `v5` --- mteb/models/jina_models.py | 7 +++---- tests/test_benchmark/test_benchmark.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index 3b542f3a51..dea802ab03 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -10,10 +10,10 @@ from sentence_transformers import __version__ as st_version from mteb.encoder_interface import PromptType +from mteb.languages import PROGRAMMING_LANGS from mteb.model_meta import ModelMeta from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper from mteb.requires_package import requires_package -from mteb.languages import PROGRAMMING_LANGS logger = logging.getLogger(__name__) @@ -234,8 +234,8 @@ def __init__( ) requires_package(self, "peft", model, "pip install 'mteb[jina-v4]'") requires_package(self, "torchvision", model, "pip install 'mteb[jina-v4]'") - import peft # noqa: F401 import flash_attn # noqa: F401 + import peft # noqa: F401 import transformers # noqa: F401 super().__init__(model, revision, model_prompts, **kwargs) @@ -284,8 +284,7 @@ def encode( def get_programming_task_override( task_name: str, current_task_name: str | None ) -> str | None: - """ - Check if task involves programming content and override with 'code' task if so. + """Check if task involves programming content and override with 'code' task if so. Args: task_name: Original task name to check diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 97cb67cb67..5f4984996a 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -9,7 +9,6 @@ import numpy as np import pytest import torch -from sentence_transformers import SentenceTransformer import mteb import mteb.overview @@ -114,7 +113,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): assert prompt_name == _task_name return np.zeros((len(sentences), 10)) - class EncoderWithoutInstructions(SentenceTransformer): + class EncoderWithoutInstructions(MockSentenceTransformer): def encode(self, sentences, **kwargs): assert kwargs["prompt_name"] is None return super().encode(sentences, **kwargs) @@ -138,7 +137,8 @@ def encode(self, sentences, **kwargs): overwrite_results=True, ) # Test that the task_name is not passed down to the encoder - model = EncoderWithoutInstructions("average_word_embeddings_levy_dependency") + model = EncoderWithoutInstructions() + model.prompts = {} assert model.prompts == {}, "The encoder should not have any prompts" eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) From b007f056c6fa6e2d0079085a61032d9ab3c68a5f Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Wed, 2 Jul 2025 21:51:43 +0300 Subject: [PATCH 2/2] add comment --- tests/test_benchmark/mock_models.py | 2 ++ tests/test_benchmark/test_benchmark.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_benchmark/mock_models.py b/tests/test_benchmark/mock_models.py index 9600559b3e..0ec55ea183 100644 --- a/tests/test_benchmark/mock_models.py +++ b/tests/test_benchmark/mock_models.py @@ -133,6 +133,8 @@ class MockSentenceTransformer(SentenceTransformer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + # by default, in SentenceTransformer, prompts are `{"query": "", "document": ""}` + self.prompts = {} def encode( self, diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 5f4984996a..11b5f4cc7f 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -138,7 +138,6 @@ def encode(self, sentences, **kwargs): ) # Test that the task_name is not passed down to the encoder model = EncoderWithoutInstructions() - model.prompts = {} assert model.prompts == {}, "The encoder should not have any prompts" eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True)