diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index 3b542f3a51..dea802ab03 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -10,10 +10,10 @@ from sentence_transformers import __version__ as st_version from mteb.encoder_interface import PromptType +from mteb.languages import PROGRAMMING_LANGS from mteb.model_meta import ModelMeta from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper from mteb.requires_package import requires_package -from mteb.languages import PROGRAMMING_LANGS logger = logging.getLogger(__name__) @@ -234,8 +234,8 @@ def __init__( ) requires_package(self, "peft", model, "pip install 'mteb[jina-v4]'") requires_package(self, "torchvision", model, "pip install 'mteb[jina-v4]'") - import peft # noqa: F401 import flash_attn # noqa: F401 + import peft # noqa: F401 import transformers # noqa: F401 super().__init__(model, revision, model_prompts, **kwargs) @@ -284,8 +284,7 @@ def encode( def get_programming_task_override( task_name: str, current_task_name: str | None ) -> str | None: - """ - Check if task involves programming content and override with 'code' task if so. + """Check if task involves programming content and override with 'code' task if so. Args: task_name: Original task name to check diff --git a/tests/test_benchmark/mock_models.py b/tests/test_benchmark/mock_models.py index 9600559b3e..0ec55ea183 100644 --- a/tests/test_benchmark/mock_models.py +++ b/tests/test_benchmark/mock_models.py @@ -133,6 +133,8 @@ class MockSentenceTransformer(SentenceTransformer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + # by default, in SentenceTransformer, prompts are `{"query": "", "document": ""}` + self.prompts = {} def encode( self, diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 97cb67cb67..11b5f4cc7f 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -9,7 +9,6 @@ import numpy as np import pytest import torch -from sentence_transformers import SentenceTransformer import mteb import mteb.overview @@ -114,7 +113,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): assert prompt_name == _task_name return np.zeros((len(sentences), 10)) - class EncoderWithoutInstructions(SentenceTransformer): + class EncoderWithoutInstructions(MockSentenceTransformer): def encode(self, sentences, **kwargs): assert kwargs["prompt_name"] is None return super().encode(sentences, **kwargs) @@ -138,7 +137,7 @@ def encode(self, sentences, **kwargs): overwrite_results=True, ) # Test that the task_name is not passed down to the encoder - model = EncoderWithoutInstructions("average_word_embeddings_levy_dependency") + model = EncoderWithoutInstructions() assert model.prompts == {}, "The encoder should not have any prompts" eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True)