Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
d71718b
add dotwrapper
Samoed Jan 10, 2025
d50fd88
lint
Samoed Jan 10, 2025
7d1e949
make cleaner
Samoed Jan 10, 2025
9e9a111
add poc similarity_fn in ModelMeta
sam-hey Jan 10, 2025
e4a692f
ref: rename EvaluationFunction to ScoringFunction
sam-hey Jan 11, 2025
1865345
make cos_sim default
sam-hey Jan 11, 2025
f34f110
Revert "make cleaner"
sam-hey Jan 11, 2025
49a954e
Revert "add dotwrapper"
sam-hey Jan 11, 2025
d9ebe97
lint
sam-hey Jan 11, 2025
4c89681
fix: _run_eval no co tracking
sam-hey Jan 12, 2025
fae6e31
Merge remote-tracking branch 'mteb/v2.0.0' into fix_contriever
sam-hey Jan 12, 2025
6298d75
fix: bm25s
sam-hey Jan 12, 2025
5a023d6
add enum to models
sam-hey Jan 12, 2025
8ad1e88
add mapping st sim fn name to mteb sim fn name
sam-hey Jan 12, 2025
700ad58
fix model meta use new fn for sim operators
sam-hey Jan 12, 2025
8cffb6a
add max_sim
sam-hey Jan 12, 2025
bf0cf07
fix: colbert & rm similarity_fn_name
sam-hey Jan 13, 2025
3391e1e
ci: skip AfriSentiLID for now (#1785)
isaac-chung Jan 13, 2025
7bb43ab
Merge branch 'v2.0.0' into fix_contriever
sam-hey Jan 13, 2025
4fabb09
test: add test for bm25s and ColBERT
sam-hey Jan 13, 2025
1442673
lint
sam-hey Jan 13, 2025
bb4beec
feat: add mapping for max_sim from pylate
sam-hey Jan 13, 2025
0f923c1
test: bm25s skip
sam-hey Jan 13, 2025
f4779c7
fix: MaxSim as max_sim match pylate & rm Enum in models
sam-hey Jan 13, 2025
89d1ae8
Merge remote-tracking branch 'mteb/v2.0.0' into fix_contriever
sam-hey Jan 14, 2025
07f4d6a
rm enum
sam-hey Jan 15, 2025
6c425f4
update tests skip
sam-hey Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mteb/evaluation/evaluators/RetrievalEvaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def __call__(
self.top_k,
task_name=self.task_name, # type: ignore
instructions=instructions,
score_function="bm25",
**kwargs,
)
else:
Expand Down
16 changes: 13 additions & 3 deletions mteb/evaluation/evaluators/model_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,19 @@ def _full_corpus_search(
query_embeddings = torch.as_tensor(query_embeddings).to(device)
sub_corpus_embeddings = torch.as_tensor(sub_corpus_embeddings).to(device)

score_function = (
self.model.similarity if hasattr(self.model, "similarity") else cos_sim
)
if hasattr(self.model.model, "mteb_model_meta") or hasattr(
self.model, "similarity"
):
score_function = (
self.model.similarity
if hasattr(self.model, "similarity")
else self.model.model.mteb_model_meta.get_similarity_function()
)
else:
logger.warning(
"The model does not provide `mteb_model_meta`; defaulting to the cosine similarity function."
)
score_function = cos_sim

with torch.inference_mode():
scores = score_function(query_embeddings, sub_corpus_embeddings)
Expand Down
28 changes: 28 additions & 0 deletions mteb/evaluation/evaluators/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,34 @@ def _cos_sim_core(a_tensor, b_tensor):
return _cos_sim_core(a, b)


def max_sim(a: np.ndarray, b: np.ndarray) -> np.ndarray:
"""Computes the max-similarity max_sim(a[i], b[j]) for all i and j.
Works with a Tensor of the shape (batch_size, num_tokens, token_dim)

Return:
Matrix with res[i][j] = max_sim(a[i], b[j])
""" # noqa: D402
if not isinstance(a, torch.Tensor):
a = torch.tensor(a, dtype=torch.float32)

if not isinstance(b, torch.Tensor):
b = torch.tensor(b, dtype=torch.float32)

if len(a.shape) == 2:
a = a.unsqueeze(0)

if len(b.shape) == 2:
b = b.unsqueeze(0)

scores = torch.einsum(
"ash,bth->abst",
a,
b,
)

return scores.max(axis=-1).values.sum(axis=-1)


def dot_score(a: torch.Tensor, b: torch.Tensor):
"""Computes the dot-product dot_prod(a[i], b[j]) for all i and j.
:return: Matrix with res[i][j] = dot_prod(a[i], b[j])
Expand Down
16 changes: 15 additions & 1 deletion mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Literal

import numpy as np
from pydantic import BaseModel, ConfigDict

from mteb.abstasks.AbsTask import AbsTask
from mteb.abstasks.TaskMetadata import STR_DATE, STR_URL
from mteb.encoder_interface import Encoder
from mteb.evaluation.evaluators.utils import cos_sim, dot_score, max_sim

from .languages import ISO_LANGUAGE_SCRIPT

Expand All @@ -30,7 +32,6 @@
"PyLate",
"ColBERT",
]
DISTANCE_METRICS = Literal["cosine", "max_sim", "dot"]


def sentence_transformers_loader(
Expand All @@ -51,6 +52,9 @@ def get_loader_name(
return loader.__name__


DISTANCE_METRICS = Literal["cosine", "MaxSim", "dot"]


class ModelMeta(BaseModel):
"""The model metadata object.

Expand Down Expand Up @@ -106,6 +110,16 @@ class ModelMeta(BaseModel):
superseded_by: str | None = None
citation: str | None = None

def get_similarity_function(self) -> Callable[[np.ndarray, np.ndarray], np.ndarray]:
if self.similarity_fn_name == "cosine":
return cos_sim
elif self.similarity_fn_name == "dot":
return dot_score
elif self.similarity_fn_name == "MaxSim":
return max_sim
elif self.similarity_fn_name is None:
raise ValueError("Similarity function not specified.")

def to_dict(self):
dict_repr = self.model_dump()
loader = dict_repr.pop("loader", None)
Expand Down
7 changes: 5 additions & 2 deletions mteb/models/colbert_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,13 @@ def encode(
)
logger.info(f"Encoding {len(sentences)} sentences.")

if "request_qid" in kwargs:
kwargs.pop("request_qid")
pred = self.model.encode(
sentences,
prompt_name=prompt_name,
is_query=True if prompt_type == PromptType.query else False,
convert_to_tensor=True,
**kwargs,
)

Expand Down Expand Up @@ -158,7 +161,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
max_tokens=180, # Reduced for Benchmarking - see ColBERT paper
embed_dim=None, # Bag of Embeddings (128) for each token
license="mit",
similarity_fn_name="max_sim",
similarity_fn_name="MaxSim",
framework=["PyLate", "ColBERT"],
reference="https://huggingface.co/colbert-ir/colbertv2.0",
use_instructions=False,
Expand Down Expand Up @@ -209,7 +212,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
max_tokens=8192,
embed_dim=None, # Bag of Embeddings (128) for each token
license="cc-by-nc-4.0",
similarity_fn_name="max_sim",
similarity_fn_name="MaxSim",
framework=["PyLate", "ColBERT"],
reference="https://huggingface.co/jinaai/jina-colbert-v2",
use_instructions=False,
Expand Down
9 changes: 6 additions & 3 deletions mteb/models/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,12 @@ def get_model(model_name: str, revision: str | None = None, **kwargs: Any) -> En
model = meta.load_model(**kwargs)

# If revision not available in the modelmeta, try to extract it from sentence-transformers
if meta.revision is None and isinstance(model, SentenceTransformer):
_meta = model_meta_from_sentence_transformers(model)
meta.revision = _meta.revision if _meta.revision else meta.revision
if isinstance(model.model, SentenceTransformer):
_meta = model_meta_from_sentence_transformers(model.model)
if meta.revision is None:
meta.revision = _meta.revision if _meta.revision else meta.revision
if not meta.similarity_fn_name:
meta.similarity_fn_name = _meta.similarity_fn_name

model.mteb_model_meta = meta # type: ignore
return model
Expand Down
6 changes: 1 addition & 5 deletions mteb/models/sentence_transformer_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def __init__(
model: str | SentenceTransformer | CrossEncoder,
revision: str | None = None,
model_prompts: dict[str, str] | None = None,
similarity_fn_name: str | None = None,
**kwargs,
) -> None:
"""Wrapper for SentenceTransformer models.
Expand All @@ -33,7 +32,6 @@ def __init__(
First priority is given to the composed prompt of task name + prompt type (query or passage), then to the specific task prompt,
then to the composed prompt of task type + prompt type, then to the specific task type prompt,
and finally to the specific prompt type.
similarity_fn_name: A similarity function to use.
**kwargs: Additional arguments to pass to the SentenceTransformer model.
"""
if isinstance(model, str):
Expand Down Expand Up @@ -61,9 +59,7 @@ def __init__(
if isinstance(self.model, CrossEncoder):
self.predict = self.handle_instructions_predict

if similarity_fn_name:
self.similarity = self.get_similarity_function(similarity_fn_name)
elif hasattr(self.model, "similarity") and callable(self.model.similarity):
if hasattr(self.model, "similarity") and callable(self.model.similarity):
self.similarity = self.model.similarity

def encode(
Expand Down
15 changes: 0 additions & 15 deletions mteb/models/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
import logging
from typing import Callable, get_args

import numpy as np

import mteb
from mteb.abstasks.TaskMetadata import TASK_TYPE
from mteb.encoder_interface import PromptType
from mteb.evaluation.evaluators.utils import cos_sim, dot_score

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,18 +64,6 @@ def get_prompt_name(
)
return None

@staticmethod
def get_similarity_function(
similarity_fn_name: str,
) -> Callable[[np.ndarray, np.ndarray], np.ndarray]:
if similarity_fn_name == "cosine":
return cos_sim
if similarity_fn_name == "dot":
return dot_score
raise ValueError(
"Invalid similarity function. Should be one of ['cosine', 'dot']"
)

@staticmethod
def validate_task_to_prompt_name(
task_to_prompt_name: dict[str, str] | None,
Expand Down
44 changes: 44 additions & 0 deletions tests/test_benchmark/test_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

import pytest

import mteb
from mteb import MTEB
from mteb.abstasks import AbsTask

from .mock_tasks import MockRetrievalTask


@pytest.mark.parametrize("model", ["colbert-ir/colbertv2.0"])
@pytest.mark.parametrize("task", [MockRetrievalTask()])
def test_colbert_model_e2e(task: AbsTask, model: str):
pytest.importorskip("pylate", reason="pylate not installed")
eval_splits = ["test"]
model = mteb.get_model(model)
evaluation = MTEB(tasks=[task])

results = evaluation.run(
model,
eval_splits=eval_splits,
corpus_chunk_size=500,
)
result = results[0]

assert result.scores["test"][0]["ndcg_at_1"] == 1.0


def test_bm25s_e2e():
# fails for dataset smaller then 1000
pytest.importorskip("bm25s", reason="bm25s not installed")
pytest.importorskip("Stemmer", reason="PyStemmer not installed")

model = mteb.get_model("bm25s")
tasks = mteb.get_tasks(tasks=["NFCorpus"])
eval_splits = ["test"]

evaluation = MTEB(tasks=tasks)

results = evaluation.run(model, eval_splits=eval_splits)
result = results[0]

assert result.scores["test"][0]["ndcg_at_1"] == 0.42879
Loading