Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion mteb/benchmarks/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2307,7 +2307,9 @@
"RealMMRagTechSlidesRetrieval",
],
),
description="Realistic and multi-modal document retrieval benchmark.",
description="""REAL-MM-RAG is a realistic benchmark that reflects real-world multi-modal document retrieval challenges.
It includes infographic-rich documents such as slides, reports, and technical manuals with tables, charts, and figures, requiring models to integrate textual and visual evidence.
The benchmark features multi-modality, realistic queries, and accurate labeling for comprehensive evaluation.""",
reference="https://arxiv.org/abs/2502.12342",
citation=r"""
@article{wasserman2025real,
Expand Down
1 change: 0 additions & 1 deletion mteb/leaderboard/benchmark_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class MenuEntry:
"MIEB(Img)",
"VisualDocumentRetrieval",
"JinaVDR",
"REAL_MM_RAG"
]
),
),
Expand Down
16 changes: 12 additions & 4 deletions mteb/tasks/Image/Any2AnyRetrieval/eng/RealMMRagBenchRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def _load_data(
class RealMMRagFinReportRetrieval(AbsTaskAny2AnyRetrieval):
metadata = TaskMetadata(
name="RealMMRagFinReportRetrieval",
description="Retrieve associated pages according to questions.",
description="""Contains annual financial reports rich in text, tables, and figures from IBM’s public filings.
Queries ask about financial results, trends, or statements across multiple years.
Retrieval goal: find the specific report page containing the relevant financial information.""",
reference="https://arxiv.org/abs/2502.12342",
dataset={
"path": "ibm-research/REAL-MM-RAG_FinReport_BEIR",
Expand Down Expand Up @@ -133,7 +135,9 @@ def load_data(self, **kwargs):
class RealMMRagFinSlidesRetrieval(AbsTaskAny2AnyRetrieval):
metadata = TaskMetadata(
name="RealMMRagFinSlidesRetrieval",
description="Retrieve associated pages according to questions.",
description="""Comprises quarterly investor presentation slides combining tables, charts, and textual highlights.
Queries focus on revenue trends, growth metrics, or business segments.
Retrieval goal: retrieve the slide that presents the requested financial data or insight.""",
reference="https://arxiv.org/abs/2502.12342",
dataset={
"path": "ibm-research/REAL-MM-RAG_FinSlides_BEIR",
Expand Down Expand Up @@ -190,7 +194,9 @@ def load_data(self, **kwargs):
class RealMMRagTechReportRetrieval(AbsTaskAny2AnyRetrieval):
metadata = TaskMetadata(
name="RealMMRagTechReportRetrieval",
description="Retrieve associated pages according to questions.",
description="""Includes technical documentation and whitepapers on IBM storage and automation systems with text-heavy content and supporting visuals.
Queries address specific technologies, architectures, or performance aspects.
Retrieval goal: locate the report page providing the technical explanation or result.""",
reference="https://arxiv.org/abs/2502.12342",
dataset={
"path": "ibm-research/REAL-MM-RAG_TechReport_BEIR",
Expand Down Expand Up @@ -247,7 +253,9 @@ def load_data(self, **kwargs):
class RealMMRagTechSlidesRetrieval(AbsTaskAny2AnyRetrieval):
metadata = TaskMetadata(
name="RealMMRagTechSlidesRetrieval",
description="Retrieve associated pages according to questions.",
description="""Features technical presentation slides containing bullet points, flow diagrams, and schematic figures.
Queries reflect realistic information-seeking about system design or AI and automation concepts.
Retrieval goal: retrieve the slide that best answers the technical query through text and visuals.""",
reference="https://arxiv.org/abs/2502.12342",
dataset={
"path": "ibm-research/REAL-MM-RAG_TechSlides_BEIR",
Expand Down
Loading