Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,110 changes: 1,110 additions & 0 deletions mteb/abstasks/AbsTaskRTEB.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
"Summarization",
"InstructionRetrieval",
"Speed",
"RTEB",
) + MIEB_TASK_TYPE

TASK_TYPE = Literal[_TASK_TYPE]
Expand Down
1 change: 1 addition & 0 deletions mteb/abstasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .AbsTaskPairClassification import *
from .AbsTaskReranking import *
from .AbsTaskRetrieval import *
from .AbsTaskRTEB import *
from .AbsTaskSpeedTask import *
from .AbsTaskSTS import *
from .AbsTaskSummarization import *
Expand Down
43 changes: 43 additions & 0 deletions mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"ColBERT",
]
DISTANCE_METRICS = Literal["cosine", "max_sim", "dot"]
EMBEDDING_DTYPES = Literal["float32", "int8", "binary"]


def sentence_transformers_loader(
Expand All @@ -61,6 +62,34 @@ def get_loader_name(
return loader.__name__


def model_id(
model_name: str,
embd_dtype: str
| None, # Keep None here as input can still be None before default assignment
embd_dim: int | None,
) -> str:
# Handle potential None values passed to the function, even if the class attribute has a default
if model_name is None:
# Or handle appropriately, maybe raise error if name is critical for ID
model_name_part = "unknown_model"
else:
model_name_part = model_name.replace("/", "__")

dtype_str = embd_dtype if embd_dtype else "unknown_dtype"
dim_str = f"{embd_dim}d" if embd_dim else "unknown_dim"

# Check if default was used implicitly for dtype
if embd_dtype is None:
# If the class attribute defaults to 'float32', reflect that possibility if None is passed
# However, the class instance will have 'float32' if not specified.
# Let's assume the function should reflect the actual value passed or derived.
# If the intention is to always use the default if None is passed, adjust logic here.
# For now, stick to representing the input or lack thereof.
pass # dtype_str is already "unknown_dtype"

return f"{model_name_part}_{dtype_str}_{dim_str}"


class ModelMeta(BaseModel):
"""The model metadata object.

Expand All @@ -73,6 +102,7 @@ class ModelMeta(BaseModel):
max_tokens: The maximum number of tokens the model can handle. Can be None if the maximum number of tokens is not known (e.g. for proprietary
models).
embed_dim: The dimension of the embeddings produced by the model. Currently all models are assumed to produce fixed-size embeddings.
embd_dtype: The data type of the embeddings produced by the model (e.g., "float32", "int8", "binary"). Defaults to "float32".
revision: The revision number of the model. If None, it is assumed that the metadata (including the loader) is valid for all revisions of the model.
release_date: The date the model's revision was released.
license: The license under which the model is released. Required if open_weights is True.
Expand Down Expand Up @@ -119,6 +149,10 @@ class ModelMeta(BaseModel):
superseded_by: str | None = None
is_cross_encoder: bool | None = None
modalities: list[MODALITIES] = ["text"]
# Attribute merged from rteb/ebr/core/meta.py
embd_dtype: EMBEDDING_DTYPES = (
"float32" # Defaulting to float32 as requested, type hint updated
)

def to_dict(self):
dict_repr = self.model_dump()
Expand Down Expand Up @@ -276,6 +310,15 @@ def calculate_memory_usage_mb(self) -> int | None:
model_memory_mb = model_memory_bytes / MB
return round(model_memory_mb)

@property
def _id(self) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need this?

"""Generates a unique ID for the model based on name, dtype, and dimension."""
if self.name is None:
raise ValueError("Model name is required to generate an ID.")
# Note: Using target's embed_dim and the newly added embd_dtype
# self.embd_dtype will be 'float32' by default if not specified otherwise
return model_id(self.name, self.embd_dtype, self.embed_dim)


def collect_similar_tasks(dataset: str, visited: set[str]) -> set[str]:
"""Recursively collect all similar tasks for a given dataset."""
Expand Down
25 changes: 25 additions & 0 deletions mteb/models/voyage_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,31 @@ def _batched_encode(
public_training_data=None,
)

voyage_3_large = ModelMeta(
name="voyageai/voyage-3-large", # Use the identifier the user provided
revision="1", # Assuming revision 1
release_date="2024-09-18", # Assuming same release as voyage-3
languages=None,
loader=partial( # type: ignore
VoyageWrapper,
model_name="voyage-3-large", # Match the API model name
model_prompts=model_prompts,
),
max_tokens=32000, # Assuming same as voyage-3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
max_tokens=32000, # Assuming same as voyage-3
max_tokens=32768,

embed_dim=1024, # Assuming same as voyage-3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check?

open_weights=False,
n_parameters=None,
memory_usage_mb=None,
license=None,
reference="https://blog.voyageai.com/2024/09/18/voyage-3/", # Assuming same reference
similarity_fn_name="cosine",
framework=["API"],
use_instructions=True,
training_datasets=VOYAGE_TRAINING_DATA,
public_training_code=None,
public_training_data=None,
)

voyage_3_lite = ModelMeta(
name="voyageai/voyage-3-lite",
revision="1",
Expand Down
47 changes: 47 additions & 0 deletions mteb/tasks/RTEB/RTEBAILACasedocsTask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from __future__ import annotations

import logging

from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB

logger = logging.getLogger(__name__)


class RTEBAILACasedocs(AbsTaskRTEB):
_TASK_SPECIFIC_METADATA = {
"task_name": "RTEBAILACasedocs",
"description": "RTEB evaluation for AILACasedocs dataset.",
"reference": "https://zenodo.org/records/4063986",
"dataset_path": "embedding-benchmark/AILACasedocs",
"dataset_revision": "main",
"main_score": "ndcg_at_10",
"revision": "1.0.1",
"date": None, # Date not specified in dataset metadata
"domains": ["Legal"],
"task_subtypes": ["Article retrieval"],
"annotations_creators": "derived",
"license": "cc-by-4.0", # Standardized license format
"bibtex_citation": """@dataset{paheli_bhattacharya_2020_4063986,
author = {Paheli Bhattacharya and
Kripabandhu Ghosh and
Saptarshi Ghosh and
Arindam Pal and
Parth Mehta and
Arnab Bhattacharya and
Prasenjit Majumder},
title = {AILA 2019 Precedent & Statute Retrieval Task},
month = oct,
year = 2020,
publisher = {Zenodo},
doi = {10.5281/zenodo.4063986},
url = {https://doi.org/10.5281/zenodo.4063986}
}""",
"modalities": ["text"],
"eval_langs": ["eng-Latn"],
}

metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA)

def __init__(self, **kwargs):
# Allow configuration via environment variable or default to the original path
super().__init__(rteb_dataset_name="AILACasedocs", **kwargs)
46 changes: 46 additions & 0 deletions mteb/tasks/RTEB/RTEBAILAStatutesTask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

import logging

from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB

logger = logging.getLogger(__name__)


class RTEBAILAStatutes(AbsTaskRTEB):
_TASK_SPECIFIC_METADATA = {
"task_name": "RTEBAILAStatutes",
"description": "RTEB evaluation for AILAStatutes dataset.",
"reference": "https://zenodo.org/records/4063986",
"dataset_path": "embedding-benchmark/AILAStatutes",
"dataset_revision": "main",
"main_score": "ndcg_at_10",
"revision": "1.0.1",
"date": None, # Date not specified in dataset metadata
"domains": ["Legal"],
"task_subtypes": ["Article retrieval"],
"annotations_creators": "derived",
"license": "cc-by-4.0", # Standardized license format
"bibtex_citation": """@dataset{paheli_bhattacharya_2020_4063986,
author = {Paheli Bhattacharya and
Kripabandhu Ghosh and
Saptarshi Ghosh and
Arindam Pal and
Parth Mehta and
Arnab Bhattacharya and
Prasenjit Majumder},
title = {AILA 2019 Precedent & Statute Retrieval Task},
month = oct,
year = 2020,
publisher = {Zenodo},
doi = {10.5281/zenodo.4063986},
url = {https://doi.org/10.5281/zenodo.4063986}
}""",
"modalities": ["text"],
"eval_langs": ["eng-Latn"],
}

metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA)

def __init__(self, **kwargs):
super().__init__(rteb_dataset_name="AILAStatutes", **kwargs)
37 changes: 37 additions & 0 deletions mteb/tasks/RTEB/RTEBAPPSTask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

import logging

from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB

logger = logging.getLogger(__name__)


class RTEBAPPS(AbsTaskRTEB):
_TASK_SPECIFIC_METADATA = {
"task_name": "RTEBAPPS",
"description": "RTEB evaluation for APPS dataset.",
"reference": "https://arxiv.org/abs/2105.09938",
"dataset_path": "embedding-benchmark/APPS",
"dataset_revision": "main",
"main_score": "ndcg_at_10",
"revision": "1.0.1",
"date": ("2021-05-20", "2021-05-20"),
"task_subtypes": ["Code retrieval"],
"license": "mit",
"annotations_creators": "derived",
"text_creation": "found",
"bibtex_citation": """@article{hendrycksapps2021,
title={Measuring Coding Challenge Competence With APPS},
author={Dan Hendrycks and Steven Basart and Saurav Kadavath and Mantas Mazeika and Akul Arora and Ethan Guo and Collin Burns and Samir Puranik and Horace He and Dawn Song and Jacob Steinhardt},
journal={NeurIPS},
year={2021}
}""",
"modalities": ["text"],
"dialect": [],
}

metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA)

def __init__(self, **kwargs):
super().__init__(rteb_dataset_name="APPS", **kwargs)
49 changes: 49 additions & 0 deletions mteb/tasks/RTEB/RTEBCOVID_QATask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

import logging

from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB

logger = logging.getLogger(__name__)


class RTEBCOVID_QA(AbsTaskRTEB):
_TASK_SPECIFIC_METADATA = {
"task_name": "RTEBCOVID_QA",
"description": "RTEB evaluation for COVID_QA dataset.",
"reference": "https://aclanthology.org/2020.nlpcovid19-acl.18/",
"dataset_path": "embedding-benchmark/COVID_QA",
"dataset_revision": "main",
"main_score": "ndcg_at_10",
"revision": "1.0.1",
"date": ("2020-01-01", "2020-12-31"),
"domains": ["Medical"],
"task_subtypes": ["Question answering"],
"license": "apache-2.0",
"annotations_creators": "expert-annotated",
"text_creation": "found",
"bibtex_citation": """@inproceedings{moller-etal-2020-covid,
title = "{COVID}-QA: A Question Answering Dataset for {COVID}-19",
author = "M{\"o}ller, Erik and
Brasch, Malte and
Eger, Steffen and
{\"U}z{\"u}mc{\"u}o{\\u{g}}lu, Hakan and
Reimers, Nils and
Gurevych, Iryna",
booktitle = "Proceedings of the 1st Workshop on NLP for COVID-19 (part 2) at ACL 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlpcovid19-acl.18",
doi = "10.18653/v1/2020.nlpcovid19-acl.18",
pages = "145--152",
abstract = "We present COVID-QA, a Question Answering dataset consisting of 2,019 question/answer pairs annotated by volunteer biomedical experts on scientific articles about COVID-19. The dataset is designed to be challenging for current QA systems, as it requires reasoning over multiple sentences and paragraphs. We provide baseline results using several state-of-the-art QA models and analyze their performance.",
}""",
"modalities": ["text"],
}

metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA)

def __init__(self, **kwargs):
super().__init__(rteb_dataset_name="COVID_QA", **kwargs)
44 changes: 44 additions & 0 deletions mteb/tasks/RTEB/RTEBChatDoctor_HealthCareMagicTask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

import logging

from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB

logger = logging.getLogger(__name__)


class RTEBChatDoctor_HealthCareMagic(AbsTaskRTEB):
_TASK_SPECIFIC_METADATA = {
"task_name": "RTEBChatDoctor_HealthCareMagic",
"description": "RTEB evaluation for ChatDoctor_HealthCareMagic dataset.",
"reference": "https://github.com/Kent0n-Li/ChatDoctor",
"dataset_path": "embedding-benchmark/ChatDoctor_HealthCareMagic",
"dataset_revision": "main",
"main_score": "ndcg_at_10",
"revision": "1.0.1",
"date": ("2023-06-24", "2023-06-24"),
"task_subtypes": [],
"license": "cc-by-4.0",
"annotations_creators": "derived",
"text_creation": "found",
"bibtex_citation": """@article{Li2023ChatDoctor,
author = {Li, Yunxiang and Li, Zihan and Zhang, Kai and Dan, Ruilong and Jiang, Steve and Zhang, You},
title = {ChatDoctor: A Medical Chat Model Fine-Tuned on a Large Language Model Meta-AI (LLaMA) Using Medical Domain Knowledge},
journal = {Cureus},
year = {2023},
volume = {15},
number = {6},
pages = {e40895},
doi = {10.7759/cureus.40895}
}""",
"modalities": ["text"],
"dialect": [],
}

metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA)

def __init__(self, **kwargs):
super().__init__(
rteb_dataset_name="ChatDoctor_HealthCareMagic",
**kwargs,
)
36 changes: 36 additions & 0 deletions mteb/tasks/RTEB/RTEBConvFinQATask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

import logging

from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB

logger = logging.getLogger(__name__)


class RTEBConvFinQA(AbsTaskRTEB):
_TASK_SPECIFIC_METADATA = {
"task_name": "RTEBConvFinQA",
"description": "RTEB evaluation for ConvFinQA dataset.",
"reference": "https://github.com/czyssrs/ConvFinQA",
"dataset_path": "embedding-benchmark/ConvFinQA",
"dataset_revision": "main",
"main_score": "ndcg_at_10",
"revision": "1.0.1",
"date": ("2022-10-07", "2022-10-07"),
"task_subtypes": ["Question answering"],
"license": "mit",
"annotations_creators": "derived",
"text_creation": "found",
"bibtex_citation": """@article{chen2022convfinqa,
title={ConvFinQA: Exploring the Chain of Numerical Reasoning in Conversational Finance Question Answering},
author={Chen, Zhiyu and Chen, Wenhu and Wang, Chuhan and Zhang, Xinyi and Zhang, Yuchi and Smrz, Pavel and Yu, Xiangyu and Fung, Pascale},
journal={arXiv preprint arXiv:2210.03849},
year={2022}
}""",
"modalities": ["text"],
}

metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA)

def __init__(self, **kwargs):
super().__init__(rteb_dataset_name="ConvFinQA", **kwargs)
Loading