Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mteb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
To run a model on a set of tasks, use the `mteb run` command. For example:

```bash
mteb run -m average_word_embeddings_komninos \
mteb run -m sentence-transformers/average_word_embeddings_komninos \
-t Banking77Classification EmotionClassification \
--output_folder mteb_output \
--verbosity 3
Expand Down Expand Up @@ -44,7 +44,7 @@
Once a model is run you can create the metadata for a model card from a folder of results, use the `mteb create_meta` command. For example:

```bash
mteb create_meta --results_folder mteb_output/average_word_embeddings_komninos/{revision} \
mteb create_meta --results_folder mteb_output/sentence-transformers__average_word_embeddings_komninos/{revision} \
--output_path model_card.md
```

Expand Down
15 changes: 13 additions & 2 deletions mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
NotASafetensorsRepoError,
SafetensorsParsingError,
)
from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, field_validator

from mteb.abstasks.AbsTask import AbsTask
from mteb.encoder_interface import Encoder
Expand Down Expand Up @@ -63,7 +63,7 @@ class ModelMeta(BaseModel):

Attributes:
loader: the function that loads the model. If None it will just default to loading the model using the sentence transformer library.
name: The name of the model, ideally the name on huggingface.
name: The name of the model, ideally the name on huggingface. It should be in the format "organization/model_name".
n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be None if the number of parameters is not known (e.g. for proprietary models) or
if the loader returns a SentenceTransformer model from which it can be derived.
memory_usage_mb: The memory usage of the model in MB. Can be None if the memory usage is not known (e.g. for proprietary models). To calculate it use the `calculate_memory_usage_mb` method.
Expand Down Expand Up @@ -123,6 +123,17 @@ def to_dict(self):
dict_repr["loader"] = get_loader_name(loader)
return dict_repr

@field_validator("name")
@classmethod
def check_name(cls, v: str | None) -> str | None:
if v is None or v == "bm25s":
return v
if "/" not in v:
raise ValueError(
"Model name must be in the format 'organization/model_name'"
)
return v

def load_model(self, **kwargs: Any) -> Encoder:
if self.loader is None:
logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion mteb/models/b1ade_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

b1ade_embed = ModelMeta(
loader=sentence_transformers_loader,
name="b1ade-embed",
name="w601sxs/b1ade-embed",
languages=["eng-Latn"],
revision="3bdac13927fdc888b903db93b2ffdbd90b295a69",
open_weights=True,
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/cohere_v.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def get_fused_embeddings(

cohere_mult_3 = ModelMeta(
loader=partial(cohere_v_loader, model_name="embed-multilingual-v3.0"),
name="embed-multilingual-v3.0-v",
name="Cohere/Cohere-embed-multilingual-v3.0",
languages=[], # Unknown, but support >100 languages
revision="1",
release_date="2024-10-24",
Expand All @@ -204,7 +204,7 @@ def get_fused_embeddings(

cohere_eng_3 = ModelMeta(
loader=partial(cohere_v_loader, model_name="embed-english-v3.0"),
name="embed-english-v3.0-v",
name="Cohere/Cohere-embed-english-v3.0",
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member Author

@Samoed Samoed Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems we have same models in cohere_models, but they not support vision. Maybe we should remove text only implementation and left only multimodal in v2?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can, as long as we move the ModelMeta.

Hmm, there don't seem to be any image tasks in the linked folder. @gowitheflow-1998 maybe we haven't run results for this model?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we should combine two implementations, but the cohere_models module uses cohere.Client, while cohere_v utilizes cohere.ClientV2. Therefore, we need to verify if the results for text tasks are the smae with cohere.ClientV2.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, there don't seem to be any image tasks in the linked folder. @gowitheflow-1998 maybe we haven't run results for this model?

yes, we weren't able to run it months back because the rate limit was 5 images per minute.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, we can merge implementation and integrate them properly after this? 5 image per minute is for free trial. I think we can request them for credits

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

found that it is now raised to 400 for production key but back in last Nov it was 5 for both trial and production when it was just released. We can run it if we want

languages=["eng-Latn"],
revision="1",
release_date="2024-10-24",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_benchmark/mock_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs):

class MockCLIPEncoder:
mteb_model_meta = ModelMeta(
name="MockCLIPModel",
name="mock/MockCLIPModel",
languages=["eng_Latn"],
revision="3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268",
release_date="2021-02-06",
Expand Down Expand Up @@ -91,7 +91,7 @@ def calculate_probs(self, text_embeddings, image_embeddings):

class MockMocoEncoder:
mteb_model_meta = ModelMeta(
name="MockMocoModel",
name="mock/MockMocoModel",
languages=["eng_Latn"],
revision="7d091cd70772c5c0ecf7f00b5f12ca609a99d69d",
release_date="2024-01-01",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_benchmark/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def test_task_modality_filtering(mock_logger, task):
f"'{modality}'" for modality in sorted(task.metadata.modalities)
)
mock_logger.assert_called_with(
f"MockMocoModel only supports ['image'], but the task modalities are [{task_modalities}]."
f"mock/MockMocoModel only supports ['image'], but the task modalities are [{task_modalities}]."
)


Expand Down
4 changes: 2 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_available_benchmarks():

run_task_fixures = [
(
"average_word_embeddings_komninos",
"sentence-transformers/average_word_embeddings_komninos",
"BornholmBitextMining",
"21eec43590414cb8e3a6f654857abed0483ae36e",
),
Expand Down Expand Up @@ -189,7 +189,7 @@ def test_create_meta_from_existing(existing_readme_name: str, gold_readme_name:


def test_save_predictions():
command = f"{sys.executable} -m mteb run -m average_word_embeddings_komninos -t NFCorpus --output_folder tests/results --save_predictions"
command = f"{sys.executable} -m mteb run -m sentence-transformers/average_word_embeddings_komninos -t NFCorpus --output_folder tests/results --save_predictions"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
assert result.returncode == 0, "Command failed"
test_folder = Path(__file__).parent
Expand Down
8 changes: 6 additions & 2 deletions tests/test_encoder_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@


def test_sentence_is_encoder():
model = SentenceTransformer("average_word_embeddings_komninos")
model = SentenceTransformer(
"sentence-transformers/average_word_embeddings_komninos"
)
assert isinstance(model, Encoder)


def test_wrapped_sentence_is_encoder_with_query_corpus_encode():
model = SentenceTransformer("average_word_embeddings_komninos")
model = SentenceTransformer(
"sentence-transformers/average_word_embeddings_komninos"
)
model = DRESModel(model)

assert isinstance(model, Encoder)
28 changes: 27 additions & 1 deletion tests/test_models/test_model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_model_memory_usage_api_model():
)
def test_model_similar_tasks(training_datasets):
dummy_model_meta = ModelMeta(
name="test_model",
name="test/test_model",
revision="test",
release_date=None,
languages=None,
Expand Down Expand Up @@ -67,6 +67,32 @@ def test_model_similar_tasks(training_datasets):
assert sorted(dummy_model_meta.get_training_datasets().keys()) == expected


def test_model_name_without_prefix():
with pytest.raises(ValueError):
ModelMeta(
name="test_model",
revision="test",
release_date=None,
languages=None,
loader=None,
n_parameters=None,
memory_usage_mb=None,
max_tokens=None,
embed_dim=None,
license=None,
open_weights=None,
public_training_code=None,
public_training_data=None,
framework=[],
reference=None,
similarity_fn_name=None,
use_instructions=None,
training_datasets=None,
adapted_from=None,
superseded_by=None,
)


def test_model_training_dataset_adapted():
model_meta = mteb.get_model_meta("deepvk/USER-bge-m3")
assert model_meta.adapted_from == "BAAI/bge-m3"
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tasks/test_mteb_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def test_mteb_rerank(tmp_path: Path):


def test_reranker_same_ndcg1():
de_name = "average_word_embeddings_komninos"
de_name = "sentence-transformers/average_word_embeddings_komninos"
revision = "21eec43590414cb8e3a6f654857abed0483ae36e"
de = SentenceTransformer(de_name, revision=revision)
ce = CrossEncoder("cross-encoder/ms-marco-TinyBERT-L-2-v2")
Expand Down Expand Up @@ -406,7 +406,7 @@ def test_reranker_same_ndcg1():

# read in stage 1 and stage two and check ndcg@1 is the same
with open(
f"tests/results/stage1/sentence-transformers__{de_name}/{revision}/SciFact.json"
f"tests/results/stage1/{de_name.replace('/', '__')}/{revision}/SciFact.json"
) as f:
stage1 = json.load(f)

Expand Down