diff --git a/mteb/benchmarks/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks/benchmarks.py index a175a9de01..88fab8eb4f 100644 --- a/mteb/benchmarks/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks/benchmarks.py @@ -2307,7 +2307,9 @@ "RealMMRagTechSlidesRetrieval", ], ), - description="Realistic and multi-modal document retrieval benchmark.", + description="""REAL-MM-RAG is a realistic benchmark that reflects real-world multi-modal document retrieval challenges. + It includes infographic-rich documents such as slides, reports, and technical manuals with tables, charts, and figures, requiring models to integrate textual and visual evidence. + The benchmark features multi-modality, realistic queries, and accurate labeling for comprehensive evaluation.""", reference="https://arxiv.org/abs/2502.12342", citation=r""" @article{wasserman2025real, diff --git a/mteb/leaderboard/benchmark_selector.py b/mteb/leaderboard/benchmark_selector.py index bb7e3ee457..3395a9ecf9 100644 --- a/mteb/leaderboard/benchmark_selector.py +++ b/mteb/leaderboard/benchmark_selector.py @@ -46,7 +46,6 @@ class MenuEntry: "MIEB(Img)", "VisualDocumentRetrieval", "JinaVDR", - "REAL_MM_RAG" ] ), ), diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RealMMRagBenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RealMMRagBenchRetrieval.py index f3c5e8eee1..3e571177d8 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RealMMRagBenchRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RealMMRagBenchRetrieval.py @@ -75,7 +75,9 @@ def _load_data( class RealMMRagFinReportRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="RealMMRagFinReportRetrieval", - description="Retrieve associated pages according to questions.", + description="""Contains annual financial reports rich in text, tables, and figures from IBM’s public filings. + Queries ask about financial results, trends, or statements across multiple years. + Retrieval goal: find the specific report page containing the relevant financial information.""", reference="https://arxiv.org/abs/2502.12342", dataset={ "path": "ibm-research/REAL-MM-RAG_FinReport_BEIR", @@ -133,7 +135,9 @@ def load_data(self, **kwargs): class RealMMRagFinSlidesRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="RealMMRagFinSlidesRetrieval", - description="Retrieve associated pages according to questions.", + description="""Comprises quarterly investor presentation slides combining tables, charts, and textual highlights. + Queries focus on revenue trends, growth metrics, or business segments. + Retrieval goal: retrieve the slide that presents the requested financial data or insight.""", reference="https://arxiv.org/abs/2502.12342", dataset={ "path": "ibm-research/REAL-MM-RAG_FinSlides_BEIR", @@ -190,7 +194,9 @@ def load_data(self, **kwargs): class RealMMRagTechReportRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="RealMMRagTechReportRetrieval", - description="Retrieve associated pages according to questions.", + description="""Includes technical documentation and whitepapers on IBM storage and automation systems with text-heavy content and supporting visuals. + Queries address specific technologies, architectures, or performance aspects. + Retrieval goal: locate the report page providing the technical explanation or result.""", reference="https://arxiv.org/abs/2502.12342", dataset={ "path": "ibm-research/REAL-MM-RAG_TechReport_BEIR", @@ -247,7 +253,9 @@ def load_data(self, **kwargs): class RealMMRagTechSlidesRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="RealMMRagTechSlidesRetrieval", - description="Retrieve associated pages according to questions.", + description="""Features technical presentation slides containing bullet points, flow diagrams, and schematic figures. + Queries reflect realistic information-seeking about system design or AI and automation concepts. + Retrieval goal: retrieve the slide that best answers the technical query through text and visuals.""", reference="https://arxiv.org/abs/2502.12342", dataset={ "path": "ibm-research/REAL-MM-RAG_TechSlides_BEIR",