diff --git a/mteb/benchmarks/benchmarks/__init__.py b/mteb/benchmarks/benchmarks/__init__.py index 862ecaa66d..0f0db150fc 100644 --- a/mteb/benchmarks/benchmarks/__init__.py +++ b/mteb/benchmarks/benchmarks/__init__.py @@ -6,6 +6,7 @@ BUILT_MTEB, C_MTEB, CHEMTEB, + CHEMTEB_V1_1, CODE_RAG, ENCODECHKA, FA_MTEB, @@ -70,6 +71,7 @@ "BRIGHT_LONG", "BUILT_MTEB", "CHEMTEB", + "CHEMTEB_V1_1", "CODE_RAG", "C_MTEB", "ENCODECHKA", diff --git a/mteb/benchmarks/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks/benchmarks.py index 6b06e44122..45bb3cf302 100644 --- a/mteb/benchmarks/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks/benchmarks.py @@ -1656,6 +1656,7 @@ CHEMTEB = Benchmark( name="ChemTEB", + aliases=["ChemTEB(v1)"], display_name="Chemical", icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg", tasks=get_tasks( @@ -1701,6 +1702,62 @@ """, ) +CHEMTEB_V1_1 = Benchmark( + name="ChemTEB(v1.1)", + aliases=["ChemTEB(latest)"], + display_name="Chemical", + icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg", + tasks=get_tasks( + tasks=[ + "PubChemSMILESBitextMining", + "SDSEyeProtectionClassification", + "SDSGlovesClassification", + "WikipediaBioMetChemClassification", + "WikipediaGreenhouseEnantiopureClassification", + "WikipediaSolidStateColloidalClassification", + "WikipediaOrganicInorganicClassification", + "WikipediaCryobiologySeparationClassification", + "WikipediaChemistryTopicsClassification", + "WikipediaTheoreticalAppliedClassification", + "WikipediaChemFieldsClassification", + "WikipediaLuminescenceClassification", + "WikipediaIsotopesFissionClassification", + "WikipediaSaltsSemiconductorsClassification", + "WikipediaBiolumNeurochemClassification", + "WikipediaCrystallographyAnalyticalClassification", + "WikipediaCompChemSpectroscopyClassification", + "WikipediaChemEngSpecialtiesClassification", + "WikipediaChemistryTopicsClustering", + "WikipediaSpecialtiesInChemistryClustering", + "PubChemAISentenceParaphrasePC", + "PubChemSMILESPC", + "PubChemSynonymPC", + "PubChemWikiParagraphsPC", + "PubChemWikiPairClassification", + "ChemNQRetrieval", + "ChemHotpotQARetrieval", + "ChemRxivRetrieval", + ], + ), + description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version adds the ChemRxivRetrieval task.", + reference="https://arxiv.org/abs/2412.00532", + citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kasmaee2025chembed, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2508.01643}, + title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings}, + year = {2025}, +} +""", +) + BEIR_NL = Benchmark( name="BEIR-NL", display_name="BEIR-NL", diff --git a/mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json b/mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json new file mode 100644 index 0000000000..eb6c4381b0 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json @@ -0,0 +1,30 @@ +{ + "test": { + "num_samples": 74457, + "number_of_characters": 76109543, + "documents_text_statistics": { + "total_text_length": 75549698, + "min_text_length": 121, + "average_text_length": 1087.7189916063176, + "max_text_length": 25438, + "unique_texts": 69150 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 559845, + "min_text_length": 57, + "average_text_length": 111.969, + "max_text_length": 224, + "unique_texts": 5000 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 5000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 5000 + }, + "top_ranked_statistics": null + } +} diff --git a/mteb/tasks/retrieval/eng/__init__.py b/mteb/tasks/retrieval/eng/__init__.py index ff3460c257..247fdcce0b 100644 --- a/mteb/tasks/retrieval/eng/__init__.py +++ b/mteb/tasks/retrieval/eng/__init__.py @@ -18,6 +18,7 @@ from .chat_doctor_retrieval import ChatDoctorRetrieval from .chem_hotpot_qa_retrieval import ChemHotpotQARetrieval from .chem_nq_retrieval import ChemNQRetrieval +from .chemrxiv import ChemRxivRetrieval from .cirr_it2i_retrieval import CIRRIT2IRetrieval from .climate_fever_retrieval import ( ClimateFEVER, @@ -254,6 +255,7 @@ "ChatDoctorRetrieval", "ChemHotpotQARetrieval", "ChemNQRetrieval", + "ChemRxivRetrieval", "ClimateFEVER", "ClimateFEVERHardNegatives", "ClimateFEVERHardNegativesV2", diff --git a/mteb/tasks/retrieval/eng/chemrxiv.py b/mteb/tasks/retrieval/eng/chemrxiv.py new file mode 100644 index 0000000000..aab8e71374 --- /dev/null +++ b/mteb/tasks/retrieval/eng/chemrxiv.py @@ -0,0 +1,33 @@ +from mteb.abstasks.retrieval import AbsTaskRetrieval +from mteb.abstasks.task_metadata import TaskMetadata + + +class ChemRxivRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="ChemRxivRetrieval", + dataset={ + "path": "BASF-AI/ChemRxivRetrieval", + "revision": "5377aa18f309ec440ff6325a4c2cd3362c2cb8d7", + }, + description="A retrieval task based on ChemRxiv papers where queries are LLM-synthesized to match specific paragraphs.", + reference="https://arxiv.org/abs/2508.01643", + type="Retrieval", + category="t2t", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2025-01-01", "2025-05-01"), + domains=["Chemistry"], + task_subtypes=["Question answering", "Article retrieval"], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated and reviewed", + dialect=[], + sample_creation="found", + bibtex_citation="""@article{kasmaee2025chembed, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2508.01643}, + title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings}, + year = {2025}, +}""", + )