From 2da6200475c81b406d723d7b5e9f9aee754c6090 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 29 May 2025 03:02:59 +0800 Subject: [PATCH 01/26] Add files via upload --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 123 +++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 mteb/tasks/Retrieval/eng/R2MEDRetrieval.py diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py new file mode 100644 index 0000000000..7bc4fb47e4 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +from collections import defaultdict + +import datasets + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata + + +DOMAINS = [ + "Biology", + "Bioinformatics", + "Medical-Sciences", + "MedXpertQA-Exam", + 'MedQA-Diag', + "PMC-Treatment", + "PMC-Clinical", + "IIYi-Clinical", +] +VERSION = {"Biology": "8b9fec2db9eda4b5742d03732213fbaee8169556", + "Bioinformatics": "6021fce366892cbfd7837fa85a4128ea93315e18", + "Medical-Sciences": "7f11654e9aed0c6fa99784641c8880f87ad62930", + "MedXpertQA-Exam": "b457ea43db9ae5db74c3a3e5be0a213d0f85ac3a", + "MedQA-Diag": "78b585990279cc01a493f876c1b0cf09557fba57", + "PMC-Treatment": "53c489a44a3664ba352c07550b72b4525a5968d5", + "PMC-Clinical": "812829522f7eaa407ef82b96717be85788a50f7e", + "IIYi-Clinical": "974abbc9bc281c3169180a6aa5d7586cfd2f5877", +} + +DOMAINS_langs = {split: ["eng-Latn"] for split in DOMAINS} + + +def load_r2med_data( + self, + path: str, + domains: list, + eval_splits: list, + cache_dir: str, + revision: dict, +): + corpus = {domain: {split: None for split in eval_splits} for domain in DOMAINS} + queries = {domain: {split: None for split in eval_splits} for domain in DOMAINS} + relevant_docs = { + domain: {split: None for split in eval_splits} for domain in DOMAINS + } + + for domain in domains: + data_path = path + domain + domain_corpus = datasets.load_dataset( + data_path, "corpus", split="corpus", cache_dir=cache_dir, revision=VERSION[domain] + ) + domain_queries = datasets.load_dataset( + data_path, "query", split="query", cache_dir=cache_dir, revision=VERSION[domain] + ) + domain_qrels = datasets.load_dataset( + data_path, "qrels", split="qrels", cache_dir=cache_dir, revision=VERSION[domain] + ) + corpus[domain]["test"] = { + e["id"]: {"text": e["text"]} for e in domain_corpus + } + queries[domain]["test"] = { + e["id"]: e["text"] for e in domain_queries + } + relevant_docs[domain]["test"] = defaultdict(dict) + for e in domain_qrels: + qid = e["q_id"] + pid = e["p_id"] + relevant_docs[domain]["test"][qid][pid] = int(e["score"]) + + corpus = datasets.DatasetDict(corpus) + queries = datasets.DatasetDict(queries) + relevant_docs = datasets.DatasetDict(relevant_docs) + return corpus, queries, relevant_docs + + +def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = self.load_r2med_data( + path=self.metadata_dict["dataset"]["path"], + domains=DOMAINS, + eval_splits=self.metadata_dict["eval_splits"], + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata_dict["dataset"]["revision"], + ) + self.data_loaded = True + + +class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): + metadata = TaskMetadata( + name="R2MEDRetrieval", + dataset={ + "path": "R2MED/", + "revision": "1.0", + }, + reference="https://huggingface.co/R2MED", + description="R2MED retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=DOMAINS_langs, + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + load_r2med_data = load_r2med_data + load_data = load_data From 4627a8a6363257fe1ca7ad8047fb4082e9363394 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 29 May 2025 03:06:32 +0800 Subject: [PATCH 02/26] Add files via upload --- scripts/run_mteb_r2med.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 scripts/run_mteb_r2med.py diff --git a/scripts/run_mteb_r2med.py b/scripts/run_mteb_r2med.py new file mode 100644 index 0000000000..a8b3ca0d73 --- /dev/null +++ b/scripts/run_mteb_r2med.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from sentence_transformers import SentenceTransformer + +from mteb import MTEB +from mteb.tasks.Retrieval.eng.R2MEDRetrieval import R2MEDRetrieval + +# testing the task with a model: +model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") +evaluation = MTEB(tasks=[R2MEDRetrieval()]) +evaluation.run(model, output_folder="results") From 1694bc3aa05ee84c220e1fa4ac5b9388327e42b1 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 29 May 2025 03:07:35 +0800 Subject: [PATCH 03/26] Update benchmarks.py --- mteb/benchmarks/benchmarks.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 2c2a0ed6b2..4430b5b3ab 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -1999,3 +1999,21 @@ } """, ) + +R2MED = Benchmark( + name="R2MED", + tasks=get_tasks(tasks=["R2MEDRetrieval"], eval_splits=["test"]), + description="""R2MED: First Reasoning-Driven Medical Retrieval Benchmark. + R2MED is a high-quality, high-resolution information retrieval (IR) dataset designed for medical scenarios. + It contains 876 queries with three retrieval tasks, five medical scenarios, and twelve body systems. + """, + reference="https://r2med.github.io/", + citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", +) From 015496b9f538570e2cc8816e8b191bcce0b135cf Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 29 May 2025 03:08:52 +0800 Subject: [PATCH 04/26] Update __init__.py --- mteb/tasks/Retrieval/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index b2abdd19e5..2e15c9669d 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -89,6 +89,7 @@ from .eng.QuoraRetrieval import * from .eng.RARbCodeRetrieval import * from .eng.RARbMathRetrieval import * +from .eng.R2MEDRetrieval import * from .eng.SCIDOCSRetrieval import * from .eng.SciFactRetrieval import * from .eng.SiqaRetrieval import * From 14a34d9799e8a2311f14871babac94bc82e8a3e1 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 29 May 2025 10:38:17 +0800 Subject: [PATCH 05/26] Add files via upload --- .../Retrieval/R2MEDRetrieval.json | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json diff --git a/mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json b/mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json new file mode 100644 index 0000000000..7d483e9fa7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json @@ -0,0 +1,166 @@ +{ + "test": { + "number_of_characters": 1385850, + "num_samples": 357735, + "num_queries": 876, + "num_documents": 356859, + "min_document_length": 89, + "average_document_length": 2.8834665792371776, + "max_document_length": 7894, + "unique_documents": 356859, + "min_query_length": 1, + "average_query_length": 407.3732876712329, + "max_query_length": 1, + "unique_queries": 876, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.058219178082192, + "max_relevant_docs_per_query": 19, + "unique_relevant_docs": 2552, + "hf_subset_descriptive_stats": { + "Biology": { + "number_of_characters": 111232, + "num_samples": 57462, + "num_queries": 103, + "num_documents": 57359, + "min_document_length": 89, + "average_document_length": 0.9392248818842728, + "max_document_length": 2195, + "unique_documents": 57359, + "min_query_length": 1, + "average_query_length": 556.8834951456311, + "max_query_length": 1, + "unique_queries": 103, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.6310679611650487, + "max_relevant_docs_per_query": 19, + "unique_relevant_docs": 374 + }, + "Bioinformatics": { + "number_of_characters": 116026, + "num_samples": 47548, + "num_queries": 77, + "num_documents": 47471, + "min_document_length": 197, + "average_document_length": 1.4441448463272313, + "max_document_length": 4984, + "unique_documents": 47471, + "min_query_length": 1, + "average_query_length": 616.5064935064935, + "max_query_length": 1, + "unique_queries": 77, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.948051948051948, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 227 + }, + "Medical-Sciences": { + "number_of_characters": 76823, + "num_samples": 34880, + "num_queries": 88, + "num_documents": 34792, + "min_document_length": 148, + "average_document_length": 1.2080650724304438, + "max_document_length": 2535, + "unique_documents": 34792, + "min_query_length": 1, + "average_query_length": 395.3636363636364, + "max_query_length": 1, + "unique_queries": 88, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.772727272727273, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 244 + }, + "MedXpertQA-Exam": { + "number_of_characters": 151438, + "num_samples": 61476, + "num_queries": 97, + "num_documents": 61379, + "min_document_length": 223, + "average_document_length": 1.4672607895208458, + "max_document_length": 2334, + "unique_documents": 61379, + "min_query_length": 1, + "average_query_length": 632.7731958762887, + "max_query_length": 1, + "unique_queries": 97, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.0103092783505154, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 274 + }, + "MedQA-Diag": { + "number_of_characters": 139589, + "num_samples": 56312, + "num_queries": 118, + "num_documents": 56194, + "min_document_length": 204, + "average_document_length": 1.4840552372139375, + "max_document_length": 1591, + "unique_documents": 56194, + "min_query_length": 1, + "average_query_length": 476.22033898305085, + "max_query_length": 1, + "unique_queries": 118, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 4.423728813559322, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 512 + }, + "PMC-Treatment": { + "number_of_characters": 292183, + "num_samples": 28959, + "num_queries": 150, + "num_documents": 28809, + "min_document_length": 991, + "average_document_length": 9.142073657537575, + "max_document_length": 2637, + "unique_documents": 28809, + "min_query_length": 1, + "average_query_length": 192.06, + "max_query_length": 1, + "unique_queries": 150, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.1, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 309 + }, + "PMC-Clinical": { + "number_of_characters": 154761, + "num_samples": 60520, + "num_queries": 114, + "num_documents": 60406, + "min_document_length": 338, + "average_document_length": 1.5620137072476243, + "max_document_length": 1361, + "unique_documents": 60406, + "min_query_length": 1, + "average_query_length": 529.8771929824561, + "max_query_length": 1, + "unique_queries": 114, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.175438596491228, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 246 + }, + "IIYi-Clinical": { + "number_of_characters": 343798, + "num_samples": 10578, + "num_queries": 129, + "num_documents": 10449, + "min_document_length": 719, + "average_document_length": 31.90247870609628, + "max_document_length": 7894, + "unique_documents": 10449, + "min_query_length": 1, + "average_query_length": 81.0, + "max_query_length": 1, + "unique_queries": 129, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.5426356589147288, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 366 + } + } + } +} \ No newline at end of file From a8d90454af4959e90c655d24cdd7f45c5e5d847e Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 17:21:32 +0800 Subject: [PATCH 06/26] Update R2MEDRetrieval.py --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 24 +++++++--------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 7bc4fb47e4..480d4cfa09 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -19,15 +19,6 @@ "PMC-Clinical", "IIYi-Clinical", ] -VERSION = {"Biology": "8b9fec2db9eda4b5742d03732213fbaee8169556", - "Bioinformatics": "6021fce366892cbfd7837fa85a4128ea93315e18", - "Medical-Sciences": "7f11654e9aed0c6fa99784641c8880f87ad62930", - "MedXpertQA-Exam": "b457ea43db9ae5db74c3a3e5be0a213d0f85ac3a", - "MedQA-Diag": "78b585990279cc01a493f876c1b0cf09557fba57", - "PMC-Treatment": "53c489a44a3664ba352c07550b72b4525a5968d5", - "PMC-Clinical": "812829522f7eaa407ef82b96717be85788a50f7e", - "IIYi-Clinical": "974abbc9bc281c3169180a6aa5d7586cfd2f5877", -} DOMAINS_langs = {split: ["eng-Latn"] for split in DOMAINS} @@ -38,7 +29,7 @@ def load_r2med_data( domains: list, eval_splits: list, cache_dir: str, - revision: dict, + revision: str, ): corpus = {domain: {split: None for split in eval_splits} for domain in DOMAINS} queries = {domain: {split: None for split in eval_splits} for domain in DOMAINS} @@ -47,15 +38,14 @@ def load_r2med_data( } for domain in domains: - data_path = path + domain domain_corpus = datasets.load_dataset( - data_path, "corpus", split="corpus", cache_dir=cache_dir, revision=VERSION[domain] + path, name=domain+"-corpus", split="corpus", cache_dir=cache_dir, revision=revision ) domain_queries = datasets.load_dataset( - data_path, "query", split="query", cache_dir=cache_dir, revision=VERSION[domain] + path, name=domain+"-query", split="query", cache_dir=cache_dir, revision=revision ) domain_qrels = datasets.load_dataset( - data_path, "qrels", split="qrels", cache_dir=cache_dir, revision=VERSION[domain] + path, name=domain+"-qrels", split="qrels", cache_dir=cache_dir, revision=revision ) corpus[domain]["test"] = { e["id"]: {"text": e["text"]} for e in domain_corpus @@ -93,10 +83,10 @@ class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDRetrieval", dataset={ - "path": "R2MED/", - "revision": "1.0", + "path": "R2MED/R2MED", + "revision": "f7cf8ddcc9e5a9c971fa71d7582fab41611c8972", }, - reference="https://huggingface.co/R2MED", + reference="https://huggingface.co/R2MED/R2MED", description="R2MED retrieval dataset.", type="Retrieval", category="s2p", From 559c68247f744db4fd7e7468d840d96e28f61e23 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:10:08 +0800 Subject: [PATCH 07/26] Update run_mteb_r2med.py --- scripts/run_mteb_r2med.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/run_mteb_r2med.py b/scripts/run_mteb_r2med.py index a8b3ca0d73..8b13789179 100644 --- a/scripts/run_mteb_r2med.py +++ b/scripts/run_mteb_r2med.py @@ -1,11 +1 @@ -from __future__ import annotations -from sentence_transformers import SentenceTransformer - -from mteb import MTEB -from mteb.tasks.Retrieval.eng.R2MEDRetrieval import R2MEDRetrieval - -# testing the task with a model: -model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") -evaluation = MTEB(tasks=[R2MEDRetrieval()]) -evaluation.run(model, output_folder="results") From 9c9aacbdd92a76a8dd24229402bb46c30bfbb8b0 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:10:36 +0800 Subject: [PATCH 08/26] Delete scripts/run_mteb_r2med.py --- scripts/run_mteb_r2med.py | 1 - 1 file changed, 1 deletion(-) delete mode 100644 scripts/run_mteb_r2med.py diff --git a/scripts/run_mteb_r2med.py b/scripts/run_mteb_r2med.py deleted file mode 100644 index 8b13789179..0000000000 --- a/scripts/run_mteb_r2med.py +++ /dev/null @@ -1 +0,0 @@ - From aaec0c4b176e03ad17dd0b3e60b08d39d1698384 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:11:42 +0800 Subject: [PATCH 09/26] Update mteb/tasks/Retrieval/eng/R2MEDRetrieval.py Co-authored-by: Roman Solomatin --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 480d4cfa09..a05bb46ea1 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -24,7 +24,6 @@ def load_r2med_data( - self, path: str, domains: list, eval_splits: list, From ffd076ee1f2920ad161bbae89f0e63ae7fb23706 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:12:03 +0800 Subject: [PATCH 10/26] Update mteb/tasks/Retrieval/eng/R2MEDRetrieval.py Co-authored-by: Roman Solomatin --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index a05bb46ea1..0419865954 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -108,5 +108,3 @@ class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): } """, ) - load_r2med_data = load_r2med_data - load_data = load_data From d3c15bf22e06ed6240ca5cdf311612ea4194dfcb Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:23:45 +0800 Subject: [PATCH 11/26] Update mteb/tasks/Retrieval/eng/R2MEDRetrieval.py Co-authored-by: Roman Solomatin --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 0419865954..7d5cd2e81f 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -64,18 +64,6 @@ def load_r2med_data( return corpus, queries, relevant_docs -def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = self.load_r2med_data( - path=self.metadata_dict["dataset"]["path"], - domains=DOMAINS, - eval_splits=self.metadata_dict["eval_splits"], - cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], - ) - self.data_loaded = True class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): From e0df0827d40a8da123a9b9c7b4d24301a437201c Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:25:54 +0800 Subject: [PATCH 12/26] Update mteb/tasks/Retrieval/eng/R2MEDRetrieval.py Co-authored-by: Roman Solomatin --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 7d5cd2e81f..a8fdb00601 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -96,3 +96,16 @@ class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): } """, ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = self.load_r2med_data( + path=self.metadata.dataset["path"], + domains=DOMAINS, + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True From 6925e05b94a4a5546751f98c22b7de2ce999d9a0 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 14:47:13 +0800 Subject: [PATCH 13/26] Add files via upload --- mteb/benchmarks/benchmarks.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 4430b5b3ab..449eeeca38 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -2002,7 +2002,19 @@ R2MED = Benchmark( name="R2MED", - tasks=get_tasks(tasks=["R2MEDRetrieval"], eval_splits=["test"]), + display_name="Reasoning-driven medical retrieval", + tasks=get_tasks( + tasks=[ + "BiologyRetrieval", + "BioinformaticsRetrieval", + "MedicalSciencesRetrieval", + "MedXpertQAExamRetrieval", + 'MedQADiagRetrieval', + "PMCTreatmentRetrieval", + "PMCClinicalRetrieval", + "IIYiClinicalRetrieval", + ] + ), description="""R2MED: First Reasoning-Driven Medical Retrieval Benchmark. R2MED is a high-quality, high-resolution information retrieval (IR) dataset designed for medical scenarios. It contains 876 queries with three retrieval tasks, five medical scenarios, and twelve body systems. @@ -2016,4 +2028,4 @@ year={2025} } """, -) +) \ No newline at end of file From 8b9e9319ee4c07035cd8c3f7875e45e14fbb7ed4 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 14:48:35 +0800 Subject: [PATCH 14/26] Delete mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json --- .../Retrieval/R2MEDRetrieval.json | 166 ------------------ 1 file changed, 166 deletions(-) delete mode 100644 mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json diff --git a/mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json b/mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json deleted file mode 100644 index 7d483e9fa7..0000000000 --- a/mteb/descriptive_stats/Retrieval/R2MEDRetrieval.json +++ /dev/null @@ -1,166 +0,0 @@ -{ - "test": { - "number_of_characters": 1385850, - "num_samples": 357735, - "num_queries": 876, - "num_documents": 356859, - "min_document_length": 89, - "average_document_length": 2.8834665792371776, - "max_document_length": 7894, - "unique_documents": 356859, - "min_query_length": 1, - "average_query_length": 407.3732876712329, - "max_query_length": 1, - "unique_queries": 876, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 3.058219178082192, - "max_relevant_docs_per_query": 19, - "unique_relevant_docs": 2552, - "hf_subset_descriptive_stats": { - "Biology": { - "number_of_characters": 111232, - "num_samples": 57462, - "num_queries": 103, - "num_documents": 57359, - "min_document_length": 89, - "average_document_length": 0.9392248818842728, - "max_document_length": 2195, - "unique_documents": 57359, - "min_query_length": 1, - "average_query_length": 556.8834951456311, - "max_query_length": 1, - "unique_queries": 103, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 3.6310679611650487, - "max_relevant_docs_per_query": 19, - "unique_relevant_docs": 374 - }, - "Bioinformatics": { - "number_of_characters": 116026, - "num_samples": 47548, - "num_queries": 77, - "num_documents": 47471, - "min_document_length": 197, - "average_document_length": 1.4441448463272313, - "max_document_length": 4984, - "unique_documents": 47471, - "min_query_length": 1, - "average_query_length": 616.5064935064935, - "max_query_length": 1, - "unique_queries": 77, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 2.948051948051948, - "max_relevant_docs_per_query": 8, - "unique_relevant_docs": 227 - }, - "Medical-Sciences": { - "number_of_characters": 76823, - "num_samples": 34880, - "num_queries": 88, - "num_documents": 34792, - "min_document_length": 148, - "average_document_length": 1.2080650724304438, - "max_document_length": 2535, - "unique_documents": 34792, - "min_query_length": 1, - "average_query_length": 395.3636363636364, - "max_query_length": 1, - "unique_queries": 88, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 2.772727272727273, - "max_relevant_docs_per_query": 8, - "unique_relevant_docs": 244 - }, - "MedXpertQA-Exam": { - "number_of_characters": 151438, - "num_samples": 61476, - "num_queries": 97, - "num_documents": 61379, - "min_document_length": 223, - "average_document_length": 1.4672607895208458, - "max_document_length": 2334, - "unique_documents": 61379, - "min_query_length": 1, - "average_query_length": 632.7731958762887, - "max_query_length": 1, - "unique_queries": 97, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 3.0103092783505154, - "max_relevant_docs_per_query": 8, - "unique_relevant_docs": 274 - }, - "MedQA-Diag": { - "number_of_characters": 139589, - "num_samples": 56312, - "num_queries": 118, - "num_documents": 56194, - "min_document_length": 204, - "average_document_length": 1.4840552372139375, - "max_document_length": 1591, - "unique_documents": 56194, - "min_query_length": 1, - "average_query_length": 476.22033898305085, - "max_query_length": 1, - "unique_queries": 118, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 4.423728813559322, - "max_relevant_docs_per_query": 8, - "unique_relevant_docs": 512 - }, - "PMC-Treatment": { - "number_of_characters": 292183, - "num_samples": 28959, - "num_queries": 150, - "num_documents": 28809, - "min_document_length": 991, - "average_document_length": 9.142073657537575, - "max_document_length": 2637, - "unique_documents": 28809, - "min_query_length": 1, - "average_query_length": 192.06, - "max_query_length": 1, - "unique_queries": 150, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 2.1, - "max_relevant_docs_per_query": 5, - "unique_relevant_docs": 309 - }, - "PMC-Clinical": { - "number_of_characters": 154761, - "num_samples": 60520, - "num_queries": 114, - "num_documents": 60406, - "min_document_length": 338, - "average_document_length": 1.5620137072476243, - "max_document_length": 1361, - "unique_documents": 60406, - "min_query_length": 1, - "average_query_length": 529.8771929824561, - "max_query_length": 1, - "unique_queries": 114, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 2.175438596491228, - "max_relevant_docs_per_query": 4, - "unique_relevant_docs": 246 - }, - "IIYi-Clinical": { - "number_of_characters": 343798, - "num_samples": 10578, - "num_queries": 129, - "num_documents": 10449, - "min_document_length": 719, - "average_document_length": 31.90247870609628, - "max_document_length": 7894, - "unique_documents": 10449, - "min_query_length": 1, - "average_query_length": 81.0, - "max_query_length": 1, - "unique_queries": 129, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 3.5426356589147288, - "max_relevant_docs_per_query": 6, - "unique_relevant_docs": 366 - } - } - } -} \ No newline at end of file From 161ab47a7dcb4a50de64a9962e442b380e0ada76 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 14:49:34 +0800 Subject: [PATCH 15/26] Add files via upload --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 387 ++++++++++++++++++--- 1 file changed, 333 insertions(+), 54 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index a8fdb00601..ef84655fb8 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -5,58 +5,39 @@ import datasets from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval -from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata -DOMAINS = [ - "Biology", - "Bioinformatics", - "Medical-Sciences", - "MedXpertQA-Exam", - 'MedQA-Diag', - "PMC-Treatment", - "PMC-Clinical", - "IIYi-Clinical", -] - -DOMAINS_langs = {split: ["eng-Latn"] for split in DOMAINS} - - def load_r2med_data( path: str, - domains: list, eval_splits: list, cache_dir: str, revision: str, ): - corpus = {domain: {split: None for split in eval_splits} for domain in DOMAINS} - queries = {domain: {split: None for split in eval_splits} for domain in DOMAINS} - relevant_docs = { - domain: {split: None for split in eval_splits} for domain in DOMAINS + eval_split = eval_splits[0] + corpus = {eval_split: None} + queries = {eval_split: None} + relevant_docs = {eval_split: None} + domain_corpus = datasets.load_dataset( + path, name="corpus", split="corpus", cache_dir=cache_dir, revision=revision + ) + domain_queries = datasets.load_dataset( + path, name="query", split="query", cache_dir=cache_dir, revision=revision + ) + domain_qrels = datasets.load_dataset( + path, name="qrels", split="qrels", cache_dir=cache_dir, revision=revision + ) + corpus[eval_split] = { + e["id"]: {"text": e["text"]} for e in domain_corpus } - - for domain in domains: - domain_corpus = datasets.load_dataset( - path, name=domain+"-corpus", split="corpus", cache_dir=cache_dir, revision=revision - ) - domain_queries = datasets.load_dataset( - path, name=domain+"-query", split="query", cache_dir=cache_dir, revision=revision - ) - domain_qrels = datasets.load_dataset( - path, name=domain+"-qrels", split="qrels", cache_dir=cache_dir, revision=revision - ) - corpus[domain]["test"] = { - e["id"]: {"text": e["text"]} for e in domain_corpus - } - queries[domain]["test"] = { - e["id"]: e["text"] for e in domain_queries - } - relevant_docs[domain]["test"] = defaultdict(dict) - for e in domain_qrels: - qid = e["q_id"] - pid = e["p_id"] - relevant_docs[domain]["test"][qid][pid] = int(e["score"]) + queries[eval_split] = { + e["id"]: e["text"] for e in domain_queries + } + relevant_docs[eval_split] = defaultdict(dict) + for e in domain_qrels: + qid = e["q_id"] + pid = e["p_id"] + relevant_docs[eval_split][qid][pid] = int(e["score"]) corpus = datasets.DatasetDict(corpus) queries = datasets.DatasetDict(queries) @@ -64,28 +45,69 @@ def load_r2med_data( return corpus, queries, relevant_docs +class BiologyRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BiologyRetrieval", + dataset={ + "path": "R2MED/Biology", + "revision": "8b9fec2db9eda4b5742d03732213fbaee8169556", + }, + reference="https://huggingface.co/R2MED/Biology", + description="Biology retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + def load_data(self, **kwargs): + if self.data_loaded: + return -class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class BioinformaticsRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="R2MEDRetrieval", + name="BioinformaticsRetrieval", dataset={ - "path": "R2MED/R2MED", - "revision": "f7cf8ddcc9e5a9c971fa71d7582fab41611c8972", + "path": "R2MED/Bioinformatics", + "revision": "6021fce366892cbfd7837fa85a4128ea93315e18", }, - reference="https://huggingface.co/R2MED/R2MED", - description="R2MED retrieval dataset.", + reference="https://huggingface.co/R2MED/Bioinformatics", + description="Bioinformatics retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], - eval_langs=DOMAINS_langs, + eval_langs=["eng-Latn"], main_score="ndcg_at_10", domains=["Medical", "Written"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation="found", + sample_creation=None, modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -96,14 +118,271 @@ class R2MEDRetrieval(MultilingualTask, AbsTaskRetrieval): } """, ) - + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class MedicalSciencesRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="MedicalSciencesRetrieval", + dataset={ + "path": "R2MED/Medical-Sciences", + "revision": "7f11654e9aed0c6fa99784641c8880f87ad62930", + }, + reference="https://huggingface.co/R2MED/Medical-Sciences", + description="Medical-Sciences retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class MedXpertQAExamRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="MedXpertQAExamRetrieval", + dataset={ + "path": "R2MED/MedXpertQA-Exam", + "revision": "b457ea43db9ae5db74c3a3e5be0a213d0f85ac3a", + }, + reference="https://huggingface.co/R2MED/MedXpertQA-Exam", + description="MedXpertQA-Exam retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class MedQADiagRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="MedQADiagRetrieval", + dataset={ + "path": "R2MED/MedQA-Diag", + "revision": "78b585990279cc01a493f876c1b0cf09557fba57", + }, + reference="https://huggingface.co/R2MED/MedQA-Diag", + description="MedQA-Diag retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class PMCTreatmentRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="PMCTreatmentRetrieval", + dataset={ + "path": "R2MED/PMC-Treatment", + "revision": "53c489a44a3664ba352c07550b72b4525a5968d5", + }, + reference="https://huggingface.co/R2MED/PMC-Treatment", + description="PMC-Treatment retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class PMCClinicalRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="PMCClinicalRetrieval", + dataset={ + "path": "R2MED/PMC-Clinical", + "revision": "812829522f7eaa407ef82b96717be85788a50f7e", + }, + reference="https://huggingface.co/R2MED/PMC-Clinical", + description="PMC-Clinical retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs = load_r2med_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + self.data_loaded = True + +class IIYiClinicalRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="IIYiClinicalRetrieval", + dataset={ + "path": "R2MED/IIYi-Clinical", + "revision": "974abbc9bc281c3169180a6aa5d7586cfd2f5877", + }, + reference="https://huggingface.co/R2MED/IIYi-Clinical", + description="IIYi-Clinical retrieval dataset.", + type="Retrieval", + category="s2p", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + domains=["Medical", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation=None, + modalities=["text"], + bibtex_citation=r""" +@article{li2025r2med, + title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + author={Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal={arXiv preprint arXiv:2505.14558}, + year={2025} +} +""", + ) + def load_data(self, **kwargs): if self.data_loaded: - return + return - self.corpus, self.queries, self.relevant_docs = self.load_r2med_data( + self.corpus, self.queries, self.relevant_docs = load_r2med_data( path=self.metadata.dataset["path"], - domains=DOMAINS, eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), revision=self.metadata.dataset["revision"], From 6fa24f894ebf0d598ea2e781853feb122c9ddbd8 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 17:45:59 +0800 Subject: [PATCH 16/26] Add files via upload --- mteb/benchmarks/benchmarks.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 449eeeca38..5e62b6f7e8 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -2005,14 +2005,14 @@ display_name="Reasoning-driven medical retrieval", tasks=get_tasks( tasks=[ - "BiologyRetrieval", - "BioinformaticsRetrieval", - "MedicalSciencesRetrieval", - "MedXpertQAExamRetrieval", - 'MedQADiagRetrieval', - "PMCTreatmentRetrieval", - "PMCClinicalRetrieval", - "IIYiClinicalRetrieval", + "R2MEDBiologyRetrieval", + "R2MEDBioinformaticsRetrieval", + "R2MEDMedicalSciencesRetrieval", + "R2MEDMedXpertQAExamRetrieval", + "R2MEDMedQADiagRetrieval", + "R2MEDPMCTreatmentRetrieval", + "R2MEDPMCClinicalRetrieval", + "R2MEDIIYiClinicalRetrieval", ] ), description="""R2MED: First Reasoning-Driven Medical Retrieval Benchmark. @@ -2028,4 +2028,4 @@ year={2025} } """, -) \ No newline at end of file +) From 779a6e9a2dc7bd4fc59ab198135b427c83c79c60 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 17:47:45 +0800 Subject: [PATCH 17/26] Add files via upload --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 64 +++++++++++----------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index ef84655fb8..b9bc23d35e 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -45,9 +45,9 @@ def load_r2med_data( return corpus, queries, relevant_docs -class BiologyRetrieval(AbsTaskRetrieval): +class R2MEDBiologyRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="BiologyRetrieval", + name="R2MEDBiologyRetrieval", dataset={ "path": "R2MED/Biology", "revision": "8b9fec2db9eda4b5742d03732213fbaee8169556", @@ -59,12 +59,12 @@ class BiologyRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -88,9 +88,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class BioinformaticsRetrieval(AbsTaskRetrieval): +class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="BioinformaticsRetrieval", + name="R2MEDBioinformaticsRetrieval", dataset={ "path": "R2MED/Bioinformatics", "revision": "6021fce366892cbfd7837fa85a4128ea93315e18", @@ -102,12 +102,12 @@ class BioinformaticsRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -131,9 +131,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class MedicalSciencesRetrieval(AbsTaskRetrieval): +class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="MedicalSciencesRetrieval", + name="R2MEDMedicalSciencesRetrieval", dataset={ "path": "R2MED/Medical-Sciences", "revision": "7f11654e9aed0c6fa99784641c8880f87ad62930", @@ -145,12 +145,12 @@ class MedicalSciencesRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -174,9 +174,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class MedXpertQAExamRetrieval(AbsTaskRetrieval): +class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="MedXpertQAExamRetrieval", + name="R2MEDMedXpertQAExamRetrieval", dataset={ "path": "R2MED/MedXpertQA-Exam", "revision": "b457ea43db9ae5db74c3a3e5be0a213d0f85ac3a", @@ -188,12 +188,12 @@ class MedXpertQAExamRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -217,9 +217,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class MedQADiagRetrieval(AbsTaskRetrieval): +class R2MEDMedQADiagRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="MedQADiagRetrieval", + name="R2MEDMedQADiagRetrieval", dataset={ "path": "R2MED/MedQA-Diag", "revision": "78b585990279cc01a493f876c1b0cf09557fba57", @@ -231,12 +231,12 @@ class MedQADiagRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -260,9 +260,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class PMCTreatmentRetrieval(AbsTaskRetrieval): +class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="PMCTreatmentRetrieval", + name="R2MEDPMCTreatmentRetrieval", dataset={ "path": "R2MED/PMC-Treatment", "revision": "53c489a44a3664ba352c07550b72b4525a5968d5", @@ -274,12 +274,12 @@ class PMCTreatmentRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -303,9 +303,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class PMCClinicalRetrieval(AbsTaskRetrieval): +class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="PMCClinicalRetrieval", + name="R2MEDPMCClinicalRetrieval", dataset={ "path": "R2MED/PMC-Clinical", "revision": "812829522f7eaa407ef82b96717be85788a50f7e", @@ -317,12 +317,12 @@ class PMCClinicalRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, @@ -346,9 +346,9 @@ def load_data(self, **kwargs): ) self.data_loaded = True -class IIYiClinicalRetrieval(AbsTaskRetrieval): +class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( - name="IIYiClinicalRetrieval", + name="R2MEDIIYiClinicalRetrieval", dataset={ "path": "R2MED/IIYi-Clinical", "revision": "974abbc9bc281c3169180a6aa5d7586cfd2f5877", @@ -360,12 +360,12 @@ class IIYiClinicalRetrieval(AbsTaskRetrieval): eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", - domains=["Medical", "Written"], + domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="derived", dialect=[], - sample_creation=None, + sample_creation="found", modalities=["text"], bibtex_citation=r""" @article{li2025r2med, From d42647a67ab83ba95ba1343984a57e47e91302d2 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 19:37:18 +0800 Subject: [PATCH 18/26] Update R2MEDRetrieval.py --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index b9bc23d35e..8424e6df80 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -62,7 +62,7 @@ class R2MEDBiologyRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -105,7 +105,7 @@ class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -148,7 +148,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -191,7 +191,7 @@ class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -234,7 +234,7 @@ class R2MEDMedQADiagRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -277,7 +277,7 @@ class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -320,7 +320,7 @@ class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], @@ -363,7 +363,7 @@ class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval): domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - annotations_creators="derived", + annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="found", modalities=["text"], From f241034c6a408a19c5e5fb4341c20d6854752a1f Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 20:44:24 +0800 Subject: [PATCH 19/26] Add files via upload From 314c47fe021428087a6f46430ceee2671e4f027e Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 20:44:42 +0800 Subject: [PATCH 20/26] Add files via upload From 5e0edde0ab44949c91bf2dbf0b4b1db8ff4039e8 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 20:45:19 +0800 Subject: [PATCH 21/26] Add files via upload --- mteb/tasks/Retrieval/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index 2e15c9669d..39932c47ec 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -87,9 +87,9 @@ from .eng.PiqaRetrieval import * from .eng.QuailRetrieval import * from .eng.QuoraRetrieval import * +from .eng.R2MEDRetrieval import * from .eng.RARbCodeRetrieval import * from .eng.RARbMathRetrieval import * -from .eng.R2MEDRetrieval import * from .eng.SCIDOCSRetrieval import * from .eng.SciFactRetrieval import * from .eng.SiqaRetrieval import * From 0f6f2ac1d70dcc8f33feb77cfc4df7e55ce57c03 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 20:45:45 +0800 Subject: [PATCH 22/26] Add files via upload --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 8424e6df80..285eab6c8e 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -27,12 +27,8 @@ def load_r2med_data( domain_qrels = datasets.load_dataset( path, name="qrels", split="qrels", cache_dir=cache_dir, revision=revision ) - corpus[eval_split] = { - e["id"]: {"text": e["text"]} for e in domain_corpus - } - queries[eval_split] = { - e["id"]: e["text"] for e in domain_queries - } + corpus[eval_split] = {e["id"]: {"text": e["text"]} for e in domain_corpus} + queries[eval_split] = {e["id"]: e["text"] for e in domain_queries} relevant_docs[eval_split] = defaultdict(dict) for e in domain_qrels: qid = e["q_id"] @@ -88,6 +84,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDBioinformaticsRetrieval", @@ -131,6 +128,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDMedicalSciencesRetrieval", @@ -174,6 +172,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDMedXpertQAExamRetrieval", @@ -217,6 +216,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDMedQADiagRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDMedQADiagRetrieval", @@ -260,6 +260,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDPMCTreatmentRetrieval", @@ -303,6 +304,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDPMCClinicalRetrieval", @@ -346,6 +348,7 @@ def load_data(self, **kwargs): ) self.data_loaded = True + class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( name="R2MEDIIYiClinicalRetrieval", From 644adc92d3ba65a4ed2449047b634bdef977178d Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Thu, 5 Jun 2025 15:56:51 +0200 Subject: [PATCH 23/26] format citations --- mteb/benchmarks/benchmarks.py | 8 +-- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 64 +++++++++++----------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 5e62b6f7e8..f2aef081ca 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -2022,10 +2022,10 @@ reference="https://r2med.github.io/", citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 285eab6c8e..4f56c985f4 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -64,10 +64,10 @@ class R2MEDBiologyRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -108,10 +108,10 @@ class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -152,10 +152,10 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -196,10 +196,10 @@ class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -240,10 +240,10 @@ class R2MEDMedQADiagRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -284,10 +284,10 @@ class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -328,10 +328,10 @@ class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) @@ -372,10 +372,10 @@ class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval): modalities=["text"], bibtex_citation=r""" @article{li2025r2med, - title={R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, - author={Li, Lei and Zhou, Xiao and Liu, Zheng}, - journal={arXiv preprint arXiv:2505.14558}, - year={2025} + author = {Li, Lei and Zhou, Xiao and Liu, Zheng}, + journal = {arXiv preprint arXiv:2505.14558}, + title = {R2MED: A Benchmark for Reasoning-Driven Medical Retrieval}, + year = {2025}, } """, ) From b1af5cf5f411c3e7ae29094c16a95bf29a305007 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Thu, 5 Jun 2025 23:24:23 +0800 Subject: [PATCH 24/26] Update R2MEDRetrieval.py --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 4f56c985f4..9cac3cb0c0 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -134,7 +134,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): name="R2MEDMedicalSciencesRetrieval", dataset={ "path": "R2MED/Medical-Sciences", - "revision": "7f11654e9aed0c6fa99784641c8880f87ad62930", + "revision": "1b48911514c80bf9182222d99752ad75e23b4b47", }, reference="https://huggingface.co/R2MED/Medical-Sciences", description="Medical-Sciences retrieval dataset.", From 5b9b83f2222da975987ac42c3814a1a4bd4bcb22 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Mon, 9 Jun 2025 16:10:18 +0800 Subject: [PATCH 25/26] Add files via upload --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 26 ++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 9cac3cb0c0..78084ebcf0 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -48,13 +48,14 @@ class R2MEDBiologyRetrieval(AbsTaskRetrieval): "path": "R2MED/Biology", "revision": "8b9fec2db9eda4b5742d03732213fbaee8169556", }, - reference="https://huggingface.co/R2MED/Biology", + reference="https://huggingface.co/datasets/R2MED/Biology", description="Biology retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("2011-01-01", "2024-06-30"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -92,13 +93,14 @@ class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval): "path": "R2MED/Bioinformatics", "revision": "6021fce366892cbfd7837fa85a4128ea93315e18", }, - reference="https://huggingface.co/R2MED/Bioinformatics", + reference="https://huggingface.co/datasets/R2MED/Bioinformatics", description="Bioinformatics retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("2011-01-01", "2025-05-31"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -136,7 +138,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): "path": "R2MED/Medical-Sciences", "revision": "1b48911514c80bf9182222d99752ad75e23b4b47", }, - reference="https://huggingface.co/R2MED/Medical-Sciences", + reference="https://huggingface.co/datasets/R2MED/Medical-Sciences", description="Medical-Sciences retrieval dataset.", type="Retrieval", category="s2p", @@ -144,6 +146,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", domains=["Medical"], + date=("2011-01-01", "2025-05-31"), task_subtypes=["Article retrieval"], license="cc-by-4.0", annotations_creators="LM-generated and reviewed", @@ -180,13 +183,14 @@ class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval): "path": "R2MED/MedXpertQA-Exam", "revision": "b457ea43db9ae5db74c3a3e5be0a213d0f85ac3a", }, - reference="https://huggingface.co/R2MED/MedXpertQA-Exam", + reference="https://huggingface.co/datasets/R2MED/MedXpertQA-Exam", description="MedXpertQA-Exam retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("1995-01-01", "2025-01-31"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -224,13 +228,14 @@ class R2MEDMedQADiagRetrieval(AbsTaskRetrieval): "path": "R2MED/MedQA-Diag", "revision": "78b585990279cc01a493f876c1b0cf09557fba57", }, - reference="https://huggingface.co/R2MED/MedQA-Diag", + reference="https://huggingface.co/datasets/R2MED/MedQA-Diag", description="MedQA-Diag retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("1992-01-01", "2020-09-30"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -268,13 +273,14 @@ class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval): "path": "R2MED/PMC-Treatment", "revision": "53c489a44a3664ba352c07550b72b4525a5968d5", }, - reference="https://huggingface.co/R2MED/PMC-Treatment", + reference="https://huggingface.co/datasets/R2MED/PMC-Treatment", description="PMC-Treatment retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("2000-02-01", "2025-03-31"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -312,13 +318,14 @@ class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval): "path": "R2MED/PMC-Clinical", "revision": "812829522f7eaa407ef82b96717be85788a50f7e", }, - reference="https://huggingface.co/R2MED/PMC-Clinical", + reference="https://huggingface.co/datasets/R2MED/PMC-Clinical", description="PMC-Clinical retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("2000-02-01", "2023-04-30"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -356,13 +363,14 @@ class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval): "path": "R2MED/IIYi-Clinical", "revision": "974abbc9bc281c3169180a6aa5d7586cfd2f5877", }, - reference="https://huggingface.co/R2MED/IIYi-Clinical", + reference="https://huggingface.co/datasets/R2MED/IIYi-Clinical", description="IIYi-Clinical retrieval dataset.", type="Retrieval", category="s2p", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", + date=("2002-07-01", "2025-05-31"), domains=["Medical"], task_subtypes=["Article retrieval"], license="cc-by-4.0", @@ -390,4 +398,4 @@ def load_data(self, **kwargs): cache_dir=kwargs.get("cache_dir", None), revision=self.metadata.dataset["revision"], ) - self.data_loaded = True + self.data_loaded = True \ No newline at end of file From 802a9f611b8a6ece8cccaa1dbc4f6f46fed982b9 Mon Sep 17 00:00:00 2001 From: Li Lei <34205771+ll0ruc@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:53:28 +0800 Subject: [PATCH 26/26] Add files via upload --- mteb/tasks/Retrieval/eng/R2MEDRetrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py index 78084ebcf0..0ccda56112 100644 --- a/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/R2MEDRetrieval.py @@ -398,4 +398,4 @@ def load_data(self, **kwargs): cache_dir=kwargs.get("cache_dir", None), revision=self.metadata.dataset["revision"], ) - self.data_loaded = True \ No newline at end of file + self.data_loaded = True