Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 60 additions & 4 deletions mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,11 @@ def is_zero_shot_on(self, tasks: Sequence[AbsTask] | Sequence[str]) -> bool | No
# If no tasks were specified, we're obviously zero-shot
if not tasks:
return True
if self.training_datasets is None:
training_datasets = self.get_training_datasets()
# If no tasks were specified, we're obviously zero-shot
if training_datasets is None:
return None
model_datasets = {ds_name for ds_name, splits in self.training_datasets.items()}
model_datasets = {ds_name for ds_name, splits in training_datasets.items()}
if isinstance(tasks[0], str):
benchmark_datasets = set(tasks)
else:
Expand All @@ -168,13 +170,42 @@ def is_zero_shot_on(self, tasks: Sequence[AbsTask] | Sequence[str]) -> bool | No
intersection = model_datasets & benchmark_datasets
return len(intersection) == 0

def get_training_datasets(self) -> dict[str, list[str]] | None:
"""Returns all training datasets of the model including similar tasks."""
import mteb

if self.training_datasets is None:
return None

training_datasets = self.training_datasets.copy()
if self.adapted_from is not None:
try:
adapted_from_model = mteb.get_model_meta(
self.adapted_from, fetch_from_hf=True
)
adapted_training_datasets = adapted_from_model.get_training_datasets()
if adapted_training_datasets is not None:
training_datasets |= adapted_training_datasets
except ValueError as e:
logger.warning(f"Could not get source model: {e} in MTEB")

return_dataset = training_datasets.copy()
visited = set()

for dataset in training_datasets:
similar_tasks = collect_similar_tasks(dataset, visited)
return_dataset |= {task: [] for task in similar_tasks}

return return_dataset

def zero_shot_percentage(
self, tasks: Sequence[AbsTask] | Sequence[str]
) -> int | None:
"""Indicates how out-of-domain the selected tasks are for the given model."""
if (self.training_datasets is None) or (not tasks):
training_datasets = self.get_training_datasets()
if (training_datasets is None) or (not tasks):
return None
model_datasets = {ds_name for ds_name, splits in self.training_datasets.items()}
model_datasets = {ds_name for ds_name, splits in training_datasets.items()}
if isinstance(tasks[0], str):
benchmark_datasets = set(tasks)
else:
Expand Down Expand Up @@ -221,3 +252,28 @@ def calculate_memory_usage_mb(self) -> int | None:
# Convert to MB
model_memory_mb = model_memory_bytes / MB
return round(model_memory_mb)


def collect_similar_tasks(dataset: str, visited: set[str]) -> set[str]:
"""Recursively collect all similar tasks for a given dataset."""
from .overview import SIMILAR_TASKS

if dataset in visited:
return set()

visited.add(dataset)
similar = set()

# Check if dataset is a key in SIMILAR_TASKS
if dataset in SIMILAR_TASKS:
for similar_task in SIMILAR_TASKS[dataset]:
similar.add(similar_task)
similar.update(collect_similar_tasks(similar_task, visited))

# Check if dataset appears as a value in SIMILAR_TASKS
for parent, children in SIMILAR_TASKS.items():
if dataset in children:
similar.add(parent)
similar.update(collect_similar_tasks(parent, visited))

return similar
9 changes: 8 additions & 1 deletion mteb/models/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,15 @@ def get_model(model_name: str, revision: str | None = None, **kwargs: Any) -> En
return model


def get_model_meta(model_name: str, revision: str | None = None) -> ModelMeta:
def get_model_meta(
model_name: str, revision: str | None = None, fetch_from_hf: bool = True
) -> ModelMeta:
"""A function to fetch a model metadata object by name.

Args:
model_name: Name of the model to fetch
revision: Revision of the model to fetch
fetch_from_hf: Whether to fetch the model from HuggingFace Hub if not found in the registry

Returns:
A model metadata object
Expand All @@ -248,6 +251,10 @@ def get_model_meta(model_name: str, revision: str | None = None) -> ModelMeta:
)
return MODEL_REGISTRY[model_name]
else: # assume it is a sentence-transformers model
if not fetch_from_hf:
raise ValueError(
f"Model {model_name} not found in MTEB registry. Please set fetch_from_hf=False to load it from HuggingFace Hub."
)
logger.info(
"Model not found in model registry, assuming it is on HF Hub model."
)
Expand Down
4 changes: 1 addition & 3 deletions mteb/models/ru_sentence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from mteb.encoder_interface import PromptType
from mteb.model_meta import ModelMeta, sentence_transformers_loader
from mteb.models.bge_models import bge_m3_training_data
from mteb.models.instruct_wrapper import InstructSentenceTransformerWrapper

rubert_tiny = ModelMeta(
Expand Down Expand Up @@ -183,15 +182,14 @@
reference="https://huggingface.co/deepvk/USER-base",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
adapted_from="https://huggingface.co/BAAI/bge-m3",
adapted_from="BAAI/bge-m3",
use_instructions=False,
training_datasets={
"BibleNLPBitextMining": ["train"],
"MLSUMClusteringP2P": ["train"],
"MLSUMClusteringP2P.v2": ["train"],
"MLSUMClusteringS2S": ["train"],
"MLSUMClusteringS2S.v2": ["train"],
**bge_m3_training_data,
# not MTEB:
# "deepvk/ru-HNP": ["train"],
# "deepvk/ru-WANLI": ["train"],
Expand Down
18 changes: 17 additions & 1 deletion mteb/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import difflib
import logging
from collections import Counter
from collections import Counter, defaultdict

import pandas as pd

Expand Down Expand Up @@ -40,7 +40,23 @@ def create_name_to_task_mapping() -> dict[str, type[AbsTask]]:
return {cls.metadata.name: cls for cls in tasks}


def create_similar_tasks() -> dict[str, list[str]]:
"""Create a dictionary of similar tasks.

Returns:
Dict with key is parent task and value is list of similar tasks.
"""
tasks = create_task_list()
similar_tasks = defaultdict(list)
for task in tasks:
if task.metadata.adapted_from:
for similar_task in task.metadata.adapted_from:
similar_tasks[similar_task].append(task.metadata.name)
return similar_tasks


TASKS_REGISTRY = create_name_to_task_mapping()
SIMILAR_TASKS = create_similar_tasks()


def check_is_valid_script(script: str) -> None:
Expand Down
49 changes: 49 additions & 0 deletions tests/test_models/test_model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

import mteb
from mteb import ModelMeta


@pytest.mark.parametrize(
Expand All @@ -24,3 +25,51 @@ def test_model_memory_usage(model_name: str, expected_memory: int | None):
def test_model_memory_usage_api_model():
meta = mteb.get_model_meta("openai/text-embedding-3-large")
assert meta.memory_usage_mb is None


@pytest.mark.parametrize(
"training_datasets",
[
{"Touche2020": []}, # parent task
{"Touche2020-NL": []}, # child task
],
)
def test_model_similar_tasks(training_datasets):
dummy_model_meta = ModelMeta(
name="test_model",
revision="test",
release_date=None,
languages=None,
loader=None,
n_parameters=None,
memory_usage_mb=None,
max_tokens=None,
embed_dim=None,
license=None,
open_weights=None,
public_training_code=None,
public_training_data=None,
framework=[],
reference=None,
similarity_fn_name=None,
use_instructions=None,
training_datasets=training_datasets,
adapted_from=None,
superseded_by=None,
)
expected = [
"NanoTouche2020Retrieval",
"Touche2020",
"Touche2020-Fa",
"Touche2020-NL",
"Touche2020Retrieval.v3",
]
assert sorted(dummy_model_meta.get_training_datasets().keys()) == expected


def test_model_training_dataset_adapted():
model_meta = mteb.get_model_meta("deepvk/USER-bge-m3")
assert model_meta.adapted_from == "BAAI/bge-m3"
# MIRACLRetrieval not in training_datasets of deepvk/USER-bge-m3, but in
# training_datasets of BAAI/bge-m3
assert "MIRACLRetrieval" in model_meta.get_training_datasets()
1 change: 0 additions & 1 deletion tests/test_tasks/test_all_abstasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import huggingface_hub
import pytest
import requests

import mteb
from mteb import MTEB
Expand Down