-
Notifications
You must be signed in to change notification settings - Fork 570
[WIP] Add RTEB retrieval code #2529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
ff8be03
Merging RTEB code
fzowl e0f343e
Merging RTEB code
fzowl d595263
Merging ModelMeta
fzowl 82fe6fe
Merging ModelMeta
fzowl 005df9b
Merge pull request #1 from fzliu/merge_model_meta
fzliu 3f9f078
Further simplification (#3)
fzowl 7d00cb0
Corrections due to the tests
fzowl 6719514
Adding a new test
fzowl 3755e54
A few file path corrections
fzowl f81c439
Correcting the embedding file names (non in-mem case)
fzowl ee6581f
Merge branch 'embeddings-benchmark:main' into main
fzowl File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -368,6 +368,31 @@ def _batched_encode( | |||||
| public_training_data=None, | ||||||
| ) | ||||||
|
|
||||||
| voyage_3_large = ModelMeta( | ||||||
| name="voyageai/voyage-3-large", # Use the identifier the user provided | ||||||
| revision="1", # Assuming revision 1 | ||||||
| release_date="2024-09-18", # Assuming same release as voyage-3 | ||||||
| languages=None, | ||||||
| loader=partial( # type: ignore | ||||||
| VoyageWrapper, | ||||||
| model_name="voyage-3-large", # Match the API model name | ||||||
| model_prompts=model_prompts, | ||||||
| ), | ||||||
| max_tokens=32000, # Assuming same as voyage-3 | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| embed_dim=1024, # Assuming same as voyage-3 | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you check? |
||||||
| open_weights=False, | ||||||
| n_parameters=None, | ||||||
| memory_usage_mb=None, | ||||||
| license=None, | ||||||
| reference="https://blog.voyageai.com/2024/09/18/voyage-3/", # Assuming same reference | ||||||
| similarity_fn_name="cosine", | ||||||
| framework=["API"], | ||||||
| use_instructions=True, | ||||||
| training_datasets=VOYAGE_TRAINING_DATA, | ||||||
| public_training_code=None, | ||||||
| public_training_data=None, | ||||||
| ) | ||||||
|
|
||||||
| voyage_3_lite = ModelMeta( | ||||||
| name="voyageai/voyage-3-lite", | ||||||
| revision="1", | ||||||
|
|
||||||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class RTEBAILACasedocs(AbsTaskRTEB): | ||
| _TASK_SPECIFIC_METADATA = { | ||
| "task_name": "RTEBAILACasedocs", | ||
| "description": "RTEB evaluation for AILACasedocs dataset.", | ||
| "reference": "https://zenodo.org/records/4063986", | ||
| "dataset_path": "embedding-benchmark/AILACasedocs", | ||
| "dataset_revision": "main", | ||
| "main_score": "ndcg_at_10", | ||
| "revision": "1.0.1", | ||
| "date": None, # Date not specified in dataset metadata | ||
| "domains": ["Legal"], | ||
| "task_subtypes": ["Article retrieval"], | ||
| "annotations_creators": "derived", | ||
| "license": "cc-by-4.0", # Standardized license format | ||
| "bibtex_citation": """@dataset{paheli_bhattacharya_2020_4063986, | ||
| author = {Paheli Bhattacharya and | ||
| Kripabandhu Ghosh and | ||
| Saptarshi Ghosh and | ||
| Arindam Pal and | ||
| Parth Mehta and | ||
| Arnab Bhattacharya and | ||
| Prasenjit Majumder}, | ||
| title = {AILA 2019 Precedent & Statute Retrieval Task}, | ||
| month = oct, | ||
| year = 2020, | ||
| publisher = {Zenodo}, | ||
| doi = {10.5281/zenodo.4063986}, | ||
| url = {https://doi.org/10.5281/zenodo.4063986} | ||
| }""", | ||
| "modalities": ["text"], | ||
| "eval_langs": ["eng-Latn"], | ||
| } | ||
|
|
||
| metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA) | ||
|
|
||
| def __init__(self, **kwargs): | ||
| # Allow configuration via environment variable or default to the original path | ||
| super().__init__(rteb_dataset_name="AILACasedocs", **kwargs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class RTEBAILAStatutes(AbsTaskRTEB): | ||
| _TASK_SPECIFIC_METADATA = { | ||
| "task_name": "RTEBAILAStatutes", | ||
| "description": "RTEB evaluation for AILAStatutes dataset.", | ||
| "reference": "https://zenodo.org/records/4063986", | ||
| "dataset_path": "embedding-benchmark/AILAStatutes", | ||
| "dataset_revision": "main", | ||
| "main_score": "ndcg_at_10", | ||
| "revision": "1.0.1", | ||
| "date": None, # Date not specified in dataset metadata | ||
| "domains": ["Legal"], | ||
| "task_subtypes": ["Article retrieval"], | ||
| "annotations_creators": "derived", | ||
| "license": "cc-by-4.0", # Standardized license format | ||
| "bibtex_citation": """@dataset{paheli_bhattacharya_2020_4063986, | ||
| author = {Paheli Bhattacharya and | ||
| Kripabandhu Ghosh and | ||
| Saptarshi Ghosh and | ||
| Arindam Pal and | ||
| Parth Mehta and | ||
| Arnab Bhattacharya and | ||
| Prasenjit Majumder}, | ||
| title = {AILA 2019 Precedent & Statute Retrieval Task}, | ||
| month = oct, | ||
| year = 2020, | ||
| publisher = {Zenodo}, | ||
| doi = {10.5281/zenodo.4063986}, | ||
| url = {https://doi.org/10.5281/zenodo.4063986} | ||
| }""", | ||
| "modalities": ["text"], | ||
| "eval_langs": ["eng-Latn"], | ||
| } | ||
|
|
||
| metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA) | ||
|
|
||
| def __init__(self, **kwargs): | ||
| super().__init__(rteb_dataset_name="AILAStatutes", **kwargs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class RTEBAPPS(AbsTaskRTEB): | ||
| _TASK_SPECIFIC_METADATA = { | ||
| "task_name": "RTEBAPPS", | ||
| "description": "RTEB evaluation for APPS dataset.", | ||
| "reference": "https://arxiv.org/abs/2105.09938", | ||
| "dataset_path": "embedding-benchmark/APPS", | ||
| "dataset_revision": "main", | ||
| "main_score": "ndcg_at_10", | ||
| "revision": "1.0.1", | ||
| "date": ("2021-05-20", "2021-05-20"), | ||
| "task_subtypes": ["Code retrieval"], | ||
| "license": "mit", | ||
| "annotations_creators": "derived", | ||
| "text_creation": "found", | ||
| "bibtex_citation": """@article{hendrycksapps2021, | ||
| title={Measuring Coding Challenge Competence With APPS}, | ||
| author={Dan Hendrycks and Steven Basart and Saurav Kadavath and Mantas Mazeika and Akul Arora and Ethan Guo and Collin Burns and Samir Puranik and Horace He and Dawn Song and Jacob Steinhardt}, | ||
| journal={NeurIPS}, | ||
| year={2021} | ||
| }""", | ||
| "modalities": ["text"], | ||
| "dialect": [], | ||
| } | ||
|
|
||
| metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA) | ||
|
|
||
| def __init__(self, **kwargs): | ||
| super().__init__(rteb_dataset_name="APPS", **kwargs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class RTEBCOVID_QA(AbsTaskRTEB): | ||
| _TASK_SPECIFIC_METADATA = { | ||
| "task_name": "RTEBCOVID_QA", | ||
| "description": "RTEB evaluation for COVID_QA dataset.", | ||
| "reference": "https://aclanthology.org/2020.nlpcovid19-acl.18/", | ||
| "dataset_path": "embedding-benchmark/COVID_QA", | ||
| "dataset_revision": "main", | ||
| "main_score": "ndcg_at_10", | ||
| "revision": "1.0.1", | ||
| "date": ("2020-01-01", "2020-12-31"), | ||
| "domains": ["Medical"], | ||
| "task_subtypes": ["Question answering"], | ||
| "license": "apache-2.0", | ||
| "annotations_creators": "expert-annotated", | ||
| "text_creation": "found", | ||
| "bibtex_citation": """@inproceedings{moller-etal-2020-covid, | ||
| title = "{COVID}-QA: A Question Answering Dataset for {COVID}-19", | ||
| author = "M{\"o}ller, Erik and | ||
| Brasch, Malte and | ||
| Eger, Steffen and | ||
| {\"U}z{\"u}mc{\"u}o{\\u{g}}lu, Hakan and | ||
| Reimers, Nils and | ||
| Gurevych, Iryna", | ||
| booktitle = "Proceedings of the 1st Workshop on NLP for COVID-19 (part 2) at ACL 2020", | ||
| month = nov, | ||
| year = "2020", | ||
| address = "Online", | ||
| publisher = "Association for Computational Linguistics", | ||
| url = "https://aclanthology.org/2020.nlpcovid19-acl.18", | ||
| doi = "10.18653/v1/2020.nlpcovid19-acl.18", | ||
| pages = "145--152", | ||
| abstract = "We present COVID-QA, a Question Answering dataset consisting of 2,019 question/answer pairs annotated by volunteer biomedical experts on scientific articles about COVID-19. The dataset is designed to be challenging for current QA systems, as it requires reasoning over multiple sentences and paragraphs. We provide baseline results using several state-of-the-art QA models and analyze their performance.", | ||
| }""", | ||
| "modalities": ["text"], | ||
| } | ||
|
|
||
| metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA) | ||
|
|
||
| def __init__(self, **kwargs): | ||
| super().__init__(rteb_dataset_name="COVID_QA", **kwargs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class RTEBChatDoctor_HealthCareMagic(AbsTaskRTEB): | ||
| _TASK_SPECIFIC_METADATA = { | ||
| "task_name": "RTEBChatDoctor_HealthCareMagic", | ||
| "description": "RTEB evaluation for ChatDoctor_HealthCareMagic dataset.", | ||
| "reference": "https://github.com/Kent0n-Li/ChatDoctor", | ||
| "dataset_path": "embedding-benchmark/ChatDoctor_HealthCareMagic", | ||
| "dataset_revision": "main", | ||
| "main_score": "ndcg_at_10", | ||
| "revision": "1.0.1", | ||
| "date": ("2023-06-24", "2023-06-24"), | ||
| "task_subtypes": [], | ||
| "license": "cc-by-4.0", | ||
| "annotations_creators": "derived", | ||
| "text_creation": "found", | ||
| "bibtex_citation": """@article{Li2023ChatDoctor, | ||
| author = {Li, Yunxiang and Li, Zihan and Zhang, Kai and Dan, Ruilong and Jiang, Steve and Zhang, You}, | ||
| title = {ChatDoctor: A Medical Chat Model Fine-Tuned on a Large Language Model Meta-AI (LLaMA) Using Medical Domain Knowledge}, | ||
| journal = {Cureus}, | ||
| year = {2023}, | ||
| volume = {15}, | ||
| number = {6}, | ||
| pages = {e40895}, | ||
| doi = {10.7759/cureus.40895} | ||
| }""", | ||
| "modalities": ["text"], | ||
| "dialect": [], | ||
| } | ||
|
|
||
| metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA) | ||
|
|
||
| def __init__(self, **kwargs): | ||
| super().__init__( | ||
| rteb_dataset_name="ChatDoctor_HealthCareMagic", | ||
| **kwargs, | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from mteb.abstasks.AbsTaskRTEB import AbsTaskRTEB | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class RTEBConvFinQA(AbsTaskRTEB): | ||
| _TASK_SPECIFIC_METADATA = { | ||
| "task_name": "RTEBConvFinQA", | ||
| "description": "RTEB evaluation for ConvFinQA dataset.", | ||
| "reference": "https://github.com/czyssrs/ConvFinQA", | ||
| "dataset_path": "embedding-benchmark/ConvFinQA", | ||
| "dataset_revision": "main", | ||
| "main_score": "ndcg_at_10", | ||
| "revision": "1.0.1", | ||
| "date": ("2022-10-07", "2022-10-07"), | ||
| "task_subtypes": ["Question answering"], | ||
| "license": "mit", | ||
| "annotations_creators": "derived", | ||
| "text_creation": "found", | ||
| "bibtex_citation": """@article{chen2022convfinqa, | ||
| title={ConvFinQA: Exploring the Chain of Numerical Reasoning in Conversational Finance Question Answering}, | ||
| author={Chen, Zhiyu and Chen, Wenhu and Wang, Chuhan and Zhang, Xinyi and Zhang, Yuchi and Smrz, Pavel and Yu, Xiangyu and Fung, Pascale}, | ||
| journal={arXiv preprint arXiv:2210.03849}, | ||
| year={2022} | ||
| }""", | ||
| "modalities": ["text"], | ||
| } | ||
|
|
||
| metadata = AbsTaskRTEB.create_rteb_task_metadata(**_TASK_SPECIFIC_METADATA) | ||
|
|
||
| def __init__(self, **kwargs): | ||
| super().__init__(rteb_dataset_name="ConvFinQA", **kwargs) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need this?