From e52d97e24a4657823e03f979f18ea7856dd7bb4f Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Wed, 21 May 2025 18:11:05 +0200
Subject: [PATCH 01/11] adding vidore benchmarks

---
 mteb/benchmarks/benchmarks.py                 |  44 +++
 mteb/tasks/Image/Any2AnyRetrieval/__init__.py |   1 +
 .../multilingual/Vidore2BenchRetrieval.py     | 282 ++++++++++++++++++
 3 files changed, 327 insertions(+)
 create mode 100644 mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index f491d71f95..42d50c62fe 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -1745,3 +1745,47 @@
 }""",
     contacts=["mehrzadshm"],
 )
+
+VIDORE = Benchmark(
+    name="ViDoRe(v1)",
+    tasks=get_tasks(
+        tasks=[
+            "VidoreArxivQARetrieval",
+            "VidoreDocVQARetrieval",
+            "VidoreInfoVQARetrieval",
+            "VidoreTabfquadRetrieval",
+            "VidoreTatdqaRetrieval",
+            "VidoreShiftProjectRetrieval",
+            "VidoreSyntheticDocQAAIRetrieval",
+            "VidoreSyntheticDocQAEnergyRetrieval",
+            "VidoreSyntheticDocQAGovernmentReportsRetrieval",
+            "VidoreSyntheticDocQAHealthcareIndustryRetrieval",
+        ],
+    ),
+    description="Retrieve associated pages according to questions.",
+    reference="https://arxiv.org/abs/2407.01449",
+    citation="""@article{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal={arXiv preprint arXiv:2407.01449},
+  year={2024}""",
+)
+
+VIDORE_V2 = Benchmark(
+    name="ViDoRe(v2)",
+    tasks=get_tasks(
+        tasks=[
+            "Vidore2SyntheticESGReportsRetrieval",
+            "Vidore2SyntheticEconsRetrieval",
+            "Vidore2SyntheticBioMedRetrieval",
+            "Vidore2ESGReportsRetrieval",
+        ],
+    ),
+    description="Retrieve associated pages according to questions.",
+    reference="https://arxiv.org/abs/2407.01449",
+    citation="""@article{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal={arXiv preprint arXiv:2407.01449},
+  year={2024}""",
+)
diff --git a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py
index 2e1656d0d5..7720827063 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/__init__.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/__init__.py
@@ -47,6 +47,7 @@
 from .eng.WebQAT2ITRetrieval import *
 from .eng.WebQAT2TRetrieval import *
 from .multilingual.VdrMultilingualRetrieval import *
+from .multilingual.Vidore2BenchRetrieval import *
 from .multilingual.WITT2IRetrieval import *
 from .multilingual.XFlickr30kCoT2IRetrieval import *
 from .multilingual.XM3600T2IRetrieval import *
diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
new file mode 100644
index 0000000000..ca884842e8
--- /dev/null
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -0,0 +1,282 @@
+from __future__ import annotations
+
+from datasets import load_dataset
+
+from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+def _load_data(
+    path: str,
+    splits: str,
+    cache_dir: str | None = None,
+    revision: str | None = None,
+):
+    corpus = {}
+    queries = {}
+    relevant_docs = {}
+
+    for split in splits:
+        query_ds = load_dataset(
+            path,
+            "queries",
+            split=split,
+            cache_dir=cache_dir,
+            revision=revision,
+        )
+        query_ds = query_ds.map(
+            lambda x: {
+                "id": f"query-{split}-{x['query-id']}",
+                "text": x["query"],
+                "image": None,
+                "modality": "text",
+            },
+            remove_columns=["query-id", "query"],
+        )
+        queries[split] = query_ds
+
+        corpus_ds = load_dataset(
+            path,
+            "corpus",
+            split=split,
+            cache_dir=cache_dir,
+            revision=revision,
+        )
+        corpus_ds = corpus_ds.map(
+            lambda x: {
+                "id": f"corpus-{split}-{x['corpus-id']}",
+                "text": None,
+                "modality": "image",
+            },
+            remove_columns=["corpus-id"],
+        )
+        corpus[split] = corpus_ds
+
+        qrels_ds = load_dataset(
+            path,
+            "qrels",
+            split=split,
+            cache_dir=cache_dir,
+            revision=revision,
+        )
+        relevant_docs[split] = {}
+        for row in qrels_ds:
+            qid = f"query-{split}-{row['query-id']}"
+            did = f"corpus-{split}-{row['corpus-id']}"
+            if qid not in relevant_docs[split]:
+                relevant_docs[split][qid] = {}
+            relevant_docs[split][qid][did] = int(row["score"])
+
+    return corpus, queries, relevant_docs
+
+
+class Vidore2SyntheticESGReportsRetrieval(AbsTaskAny2AnyRetrieval):
+    metadata = TaskMetadata(
+        name="Vidore2SyntheticESGReportsRetrieval",
+        description="Retrieve associated pages according to questions.",
+        reference="https://arxiv.org/pdf/2407.01449",
+        dataset={
+            "path": "vidore/synthetic_rse_restaurant_filtered_v1.0_multilingual",
+            "revision": "0542c0d03da0ec1c8cbc517c8d78e7e95c75d3d3",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["eng-Latn", "spa-Latn", "fra-Latn", "deu-Latn"],
+        main_score="ndcg_at_5",
+        date=("2025-01-01", "2025-03-01"),
+        domains=["Academic"],
+        task_subtypes=["Image Text Retrieval"],
+        license="mit",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="found",
+        bibtex_citation="""@article{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal={arXiv preprint arXiv:2407.01449},
+  year={2024}
+}""",
+        prompt={"query": "Find a screenshot that relevant to the user's question."},
+        descriptive_stats={
+            "n_samples": None,
+            "avg_character_length": {
+                "test": {
+                    "average_document_length": 1.0,
+                    "num_documents": 30,
+                    "num_queries": 228,
+                    "average_relevant_docs_per_query": 1.0,
+                }
+            },
+        },
+    )
+
+    def load_data(self, **kwargs):
+        self.corpus, self.queries, self.relevant_docs = _load_data(
+            path=self.metadata_dict["dataset"]["path"],
+            splits=self.metadata_dict["eval_splits"],
+            cache_dir=kwargs.get("cache_dir", None),
+            revision=self.metadata_dict["dataset"]["revision"],
+        )
+
+        self.data_loaded = True
+
+
+class Vidore2SyntheticEconsRetrieval(AbsTaskAny2AnyRetrieval):
+    metadata = TaskMetadata(
+        name="Vidore2SyntheticEconsRetrieval",
+        description="Retrieve associated pages according to questions.",
+        reference="https://arxiv.org/pdf/2407.01449",
+        dataset={
+            "path": "vidore/synthetics_economics_macro_economy_2024_filtered_v1.0_multilingual",
+            "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["eng-Latn", "spa-Latn", "fra-Latn", "deu-Latn"],
+        main_score="ndcg_at_5",
+        date=("2025-01-01", "2025-03-01"),
+        domains=["Academic"],
+        task_subtypes=["Image Text Retrieval"],
+        license="mit",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="found",
+        bibtex_citation="""@article{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal={arXiv preprint arXiv:2407.01449},
+  year={2024}
+}""",
+        prompt={"query": "Find a screenshot that relevant to the user's question."},
+        descriptive_stats={
+            "n_samples": None,
+            "avg_character_length": {
+                "test": {
+                    "average_document_length": 1.0,
+                    "num_documents": 5,
+                    "num_queries": 232,
+                    "average_relevant_docs_per_query": 1.0,
+                }
+            },
+        },
+    )
+
+    def load_data(self, **kwargs):
+        self.corpus, self.queries, self.relevant_docs = _load_data(
+            path=self.metadata_dict["dataset"]["path"],
+            splits=self.metadata_dict["eval_splits"],
+            cache_dir=kwargs.get("cache_dir", None),
+            revision=self.metadata_dict["dataset"]["revision"],
+        )
+
+        self.data_loaded = True
+
+
+class Vidore2SyntheticBioMedRetrieval(AbsTaskAny2AnyRetrieval):
+    metadata = TaskMetadata(
+        name="Vidore2SyntheticBioMedRetrieval",
+        description="Retrieve associated pages according to questions.",
+        reference="https://arxiv.org/pdf/2407.01449",
+        dataset={
+            "path": "vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered_multilingual",
+            "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["eng-Latn", "spa-Latn", "fra-Latn", "deu-Latn"],
+        main_score="ndcg_at_5",
+        date=("2025-01-01", "2025-03-01"),
+        domains=["Academic"],
+        task_subtypes=["Image Text Retrieval"],
+        license="mit",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="found",
+        bibtex_citation="""@article{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal={arXiv preprint arXiv:2407.01449},
+  year={2024}
+}""",
+        prompt={"query": "Find a screenshot that relevant to the user's question."},
+        descriptive_stats={
+            "n_samples": None,
+            "avg_character_length": {
+                "test": {
+                    "average_document_length": 1.0,
+                    "num_documents": 27,
+                    "num_queries": 640,
+                    "average_relevant_docs_per_query": 1.0,
+                }
+            },
+        },
+    )
+
+    def load_data(self, **kwargs):
+        self.corpus, self.queries, self.relevant_docs = _load_data(
+            path=self.metadata_dict["dataset"]["path"],
+            splits=self.metadata_dict["eval_splits"],
+            cache_dir=kwargs.get("cache_dir", None),
+            revision=self.metadata_dict["dataset"]["revision"],
+        )
+
+        self.data_loaded = True
+
+
+class Vidore2ESGReportsRetrieval(AbsTaskAny2AnyRetrieval):
+    metadata = TaskMetadata(
+        name="Vidore2ESGReportsRetrieval",
+        description="Retrieve associated pages according to questions.",
+        reference="https://arxiv.org/pdf/2407.01449",
+        dataset={
+            "path": "vidore/restaurant_esg_reports_beir",
+            "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["eng-Latn"],
+        main_score="ndcg_at_5",
+        date=("2025-01-01", "2025-03-01"),
+        domains=["Academic"],
+        task_subtypes=["Image Text Retrieval"],
+        license="mit",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="found",
+        bibtex_citation="""@article{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal={arXiv preprint arXiv:2407.01449},
+  year={2024}
+}""",
+        prompt={"query": "Find a screenshot that relevant to the user's question."},
+        descriptive_stats={
+            "n_samples": None,
+            "avg_character_length": {
+                "test": {
+                    "average_document_length": 1.0,
+                    "num_documents": 27,
+                    "num_queries": 640,
+                    "average_relevant_docs_per_query": 1.0,
+                }
+            },
+        },
+    )
+
+    def load_data(self, **kwargs):
+        self.corpus, self.queries, self.relevant_docs = _load_data(
+            path=self.metadata_dict["dataset"]["path"],
+            splits=self.metadata_dict["eval_splits"],
+            cache_dir=kwargs.get("cache_dir", None),
+            revision=self.metadata_dict["dataset"]["revision"],
+        )
+
+        self.data_loaded = True

From 2c5c137aa58d71ecbf7e0a00d91c44fa3e0ad12c Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Thu, 22 May 2025 17:30:36 +0200
Subject: [PATCH 02/11] fix typo

---
 mteb/benchmarks/benchmarks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index 6d20484e50..ca673c8b6e 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -1949,3 +1949,4 @@
   author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
   journal={arXiv preprint arXiv:2407.01449},
   year={2024}""",
+)
\ No newline at end of file

From 5c3fb4d0b87d182274780224cc5b65ce72a95e4c Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Fri, 23 May 2025 14:51:48 +0200
Subject: [PATCH 03/11] clean vidore names + per lang eval

---
 mteb/benchmarks/benchmarks.py                 |  6 +-
 .../multilingual/Vidore2BenchRetrieval.py     | 89 +++++++++++++------
 2 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index ca673c8b6e..a41867f26c 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -1936,10 +1936,10 @@
     name="ViDoRe(v2)",
     tasks=get_tasks(
         tasks=[
-            "Vidore2SyntheticESGReportsRetrieval",
-            "Vidore2SyntheticEconsRetrieval",
-            "Vidore2SyntheticBioMedRetrieval",
             "Vidore2ESGReportsRetrieval",
+            "Vidore2EconomicsReportsRetrieval",
+            "Vidore2BioMedicalLecturesRetrieval",
+            "Vidore2ESGReportsHLRetrieval",
         ],
     ),
     description="Retrieve associated pages according to questions.",
diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index ca884842e8..204041079c 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -3,18 +3,31 @@
 from datasets import load_dataset
 
 from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval
+from mteb.abstasks.MultilingualTask import MultilingualTask
 from mteb.abstasks.TaskMetadata import TaskMetadata
 
+_LANGS = {
+    "french": ["fra-Latn"],
+    "spanish": ["spa-Latn"],
+    "english": ["eng-Latn"],
+    "german": ["deu-Latn"],
+}
 
 def _load_data(
     path: str,
     splits: str,
+    langs: list | None = None,
     cache_dir: str | None = None,
     revision: str | None = None,
 ):
-    corpus = {}
-    queries = {}
-    relevant_docs = {}
+    if langs is None:
+        corpus = {}
+        queries = {}
+        relevant_docs = {}
+    else:
+        corpus = dict.fromkeys(langs, {})
+        queries = dict.fromkeys(langs, {})
+        relevant_docs = dict.fromkeys(langs, {})
 
     for split in splits:
         query_ds = load_dataset(
@@ -33,8 +46,7 @@ def _load_data(
             },
             remove_columns=["query-id", "query"],
         )
-        queries[split] = query_ds
-
+        
         corpus_ds = load_dataset(
             path,
             "corpus",
@@ -50,7 +62,6 @@ def _load_data(
             },
             remove_columns=["corpus-id"],
         )
-        corpus[split] = corpus_ds
 
         qrels_ds = load_dataset(
             path,
@@ -59,30 +70,49 @@ def _load_data(
             cache_dir=cache_dir,
             revision=revision,
         )
-        relevant_docs[split] = {}
-        for row in qrels_ds:
-            qid = f"query-{split}-{row['query-id']}"
-            did = f"corpus-{split}-{row['corpus-id']}"
-            if qid not in relevant_docs[split]:
-                relevant_docs[split][qid] = {}
-            relevant_docs[split][qid][did] = int(row["score"])
+
+        if langs is None:
+            queries[split] = query_ds
+            corpus[split] = corpus_ds
+            relevant_docs[split] = {}
+            for row in qrels_ds:
+                qid = f"query-{split}-{row['query-id']}"
+                did = f"corpus-{split}-{row['corpus-id']}"
+                if qid not in relevant_docs[split]:
+                    relevant_docs[split][qid] = {}
+                relevant_docs[split][qid][did] = int(row["score"])
+        else:
+            for lang in langs:
+                queries[lang][split] = query_ds.filter(
+                    lambda x: x["language"] == lang
+                )
+
+                corpus[lang][split] = corpus_ds
+
+                relevant_docs[lang][split] = {}
+                for row in qrels_ds:
+                    qid = f"query-{split}-{row['query-id']}"
+                    did = f"corpus-{split}-{row['corpus-id']}"
+                    if qid not in relevant_docs[lang][split]:
+                        relevant_docs[lang][split][qid] = {}
+                    relevant_docs[lang][split][qid][did] = int(row["score"])
 
     return corpus, queries, relevant_docs
 
 
-class Vidore2SyntheticESGReportsRetrieval(AbsTaskAny2AnyRetrieval):
+class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
     metadata = TaskMetadata(
-        name="Vidore2SyntheticESGReportsRetrieval",
+        name="Vidore2ESGReportsRetrieval",
         description="Retrieve associated pages according to questions.",
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
-            "path": "vidore/synthetic_rse_restaurant_filtered_v1.0_multilingual",
+            "path": "vidore/esg_reports_v2",
             "revision": "0542c0d03da0ec1c8cbc517c8d78e7e95c75d3d3",
         },
         type="DocumentUnderstanding",
         category="t2i",
         eval_splits=["test"],
-        eval_langs=["eng-Latn", "spa-Latn", "fra-Latn", "deu-Latn"],
+        eval_langs=_LANGS,
         main_score="ndcg_at_5",
         date=("2025-01-01", "2025-03-01"),
         domains=["Academic"],
@@ -116,6 +146,7 @@ def load_data(self, **kwargs):
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],
+            langs=_LANGS.keys(),
             cache_dir=kwargs.get("cache_dir", None),
             revision=self.metadata_dict["dataset"]["revision"],
         )
@@ -123,19 +154,19 @@ def load_data(self, **kwargs):
         self.data_loaded = True
 
 
-class Vidore2SyntheticEconsRetrieval(AbsTaskAny2AnyRetrieval):
+class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
     metadata = TaskMetadata(
-        name="Vidore2SyntheticEconsRetrieval",
+        name="Vidore2EconomicsReportsRetrieval",
         description="Retrieve associated pages according to questions.",
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
-            "path": "vidore/synthetics_economics_macro_economy_2024_filtered_v1.0_multilingual",
+            "path": "vidore/economics_reports_v2",
             "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
         },
         type="DocumentUnderstanding",
         category="t2i",
         eval_splits=["test"],
-        eval_langs=["eng-Latn", "spa-Latn", "fra-Latn", "deu-Latn"],
+        eval_langs=_LANGS,
         main_score="ndcg_at_5",
         date=("2025-01-01", "2025-03-01"),
         domains=["Academic"],
@@ -169,6 +200,7 @@ def load_data(self, **kwargs):
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],
+            langs=_LANGS.keys(),
             cache_dir=kwargs.get("cache_dir", None),
             revision=self.metadata_dict["dataset"]["revision"],
         )
@@ -176,19 +208,19 @@ def load_data(self, **kwargs):
         self.data_loaded = True
 
 
-class Vidore2SyntheticBioMedRetrieval(AbsTaskAny2AnyRetrieval):
+class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
     metadata = TaskMetadata(
-        name="Vidore2SyntheticBioMedRetrieval",
+        name="Vidore2BioMedicalLecturesRetrieval",
         description="Retrieve associated pages according to questions.",
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
-            "path": "vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered_multilingual",
+            "path": "vidore/biomedical_lectures_v2",
             "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
         },
         type="DocumentUnderstanding",
         category="t2i",
         eval_splits=["test"],
-        eval_langs=["eng-Latn", "spa-Latn", "fra-Latn", "deu-Latn"],
+        eval_langs=_LANGS,
         main_score="ndcg_at_5",
         date=("2025-01-01", "2025-03-01"),
         domains=["Academic"],
@@ -222,6 +254,7 @@ def load_data(self, **kwargs):
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],
+            langs=_LANGS.keys(),
             cache_dir=kwargs.get("cache_dir", None),
             revision=self.metadata_dict["dataset"]["revision"],
         )
@@ -229,13 +262,13 @@ def load_data(self, **kwargs):
         self.data_loaded = True
 
 
-class Vidore2ESGReportsRetrieval(AbsTaskAny2AnyRetrieval):
+class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
     metadata = TaskMetadata(
-        name="Vidore2ESGReportsRetrieval",
+        name="Vidore2ESGReportsHLRetrieval",
         description="Retrieve associated pages according to questions.",
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
-            "path": "vidore/restaurant_esg_reports_beir",
+            "path": "vidore/esg_reports_hl_v2",
             "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
         },
         type="DocumentUnderstanding",

From af98cbd42e2a9b28089566e84229be756377844b Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Fri, 23 May 2025 14:52:53 +0200
Subject: [PATCH 04/11] lint

---
 mteb/benchmarks/benchmarks.py                              | 2 +-
 .../Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index a41867f26c..c74abfd36a 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -1949,4 +1949,4 @@
   author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
   journal={arXiv preprint arXiv:2407.01449},
   year={2024}""",
-)
\ No newline at end of file
+)
diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index 204041079c..d52ef80a7a 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -13,6 +13,7 @@
     "german": ["deu-Latn"],
 }
 
+
 def _load_data(
     path: str,
     splits: str,
@@ -46,7 +47,7 @@ def _load_data(
             },
             remove_columns=["query-id", "query"],
         )
-        
+
         corpus_ds = load_dataset(
             path,
             "corpus",
@@ -83,9 +84,7 @@ def _load_data(
                 relevant_docs[split][qid][did] = int(row["score"])
         else:
             for lang in langs:
-                queries[lang][split] = query_ds.filter(
-                    lambda x: x["language"] == lang
-                )
+                queries[lang][split] = query_ds.filter(lambda x: x["language"] == lang)
 
                 corpus[lang][split] = corpus_ds
 

From 93c8d86cfc6bfb54dde0fb1e86b244c569be05f4 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Fri, 23 May 2025 15:00:14 +0200
Subject: [PATCH 05/11] vidore names

---
 .../Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index d52ef80a7a..3b429df2ce 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -267,7 +267,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         description="Retrieve associated pages according to questions.",
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
-            "path": "vidore/esg_reports_hl_v2",
+            "path": "vidore/esg_reports_human_labeled_v2",
             "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
         },
         type="DocumentUnderstanding",

From 761d61fc201a217378920aaffeb0e2653a856e45 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Fri, 23 May 2025 15:39:25 +0200
Subject: [PATCH 06/11] bibtex fix

---
 mteb/benchmarks/benchmarks.py                 | 26 +++++----
 .../multilingual/Vidore2BenchRetrieval.py     | 56 +++++++++++--------
 2 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index c74abfd36a..7f863c9788 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -1925,11 +1925,14 @@
     ),
     description="Retrieve associated pages according to questions.",
     reference="https://arxiv.org/abs/2407.01449",
-    citation="""@article{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal={arXiv preprint arXiv:2407.01449},
-  year={2024}""",
+    citation=r"""
+@article{faysse2024colpali,
+  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal = {arXiv preprint arXiv:2407.01449},
+  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
+  year = {2024},
+}
+""",
 )
 
 VIDORE_V2 = Benchmark(
@@ -1944,9 +1947,12 @@
     ),
     description="Retrieve associated pages according to questions.",
     reference="https://arxiv.org/abs/2407.01449",
-    citation="""@article{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal={arXiv preprint arXiv:2407.01449},
-  year={2024}""",
+    citation=r"""
+@article{faysse2024colpali,
+  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal = {arXiv preprint arXiv:2407.01449},
+  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
+  year = {2024},
+}
+""",
 )
diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index 3b429df2ce..1ba0582768 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -121,12 +121,14 @@ class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation="""@article{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal={arXiv preprint arXiv:2407.01449},
-  year={2024}
-}""",
+        bibtex_citation=r"""
+@article{faysse2024colpali,
+  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal = {arXiv preprint arXiv:2407.01449},
+  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
+  year = {2024},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -175,12 +177,14 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation="""@article{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal={arXiv preprint arXiv:2407.01449},
-  year={2024}
-}""",
+        bibtex_citation=r"""
+@article{faysse2024colpali,
+  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal = {arXiv preprint arXiv:2407.01449},
+  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
+  year = {2024},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -229,12 +233,14 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation="""@article{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal={arXiv preprint arXiv:2407.01449},
-  year={2024}
-}""",
+        bibtex_citation=r"""
+@article{faysse2024colpali,
+  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal = {arXiv preprint arXiv:2407.01449},
+  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
+  year = {2024},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -283,12 +289,14 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation="""@article{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal={arXiv preprint arXiv:2407.01449},
-  year={2024}
-}""",
+        bibtex_citation=r"""
+@article{faysse2024colpali,
+  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
+  journal = {arXiv preprint arXiv:2407.01449},
+  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
+  year = {2024},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,

From f6b339bf707af96d815f5c2a11238715d1698fb7 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Fri, 23 May 2025 17:00:50 +0200
Subject: [PATCH 07/11] fix revision

---
 .../Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index 1ba0582768..bfd3adec1e 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -162,7 +162,7 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
             "path": "vidore/economics_reports_v2",
-            "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
+            "revision": "b3e3a04b07fbbaffe79be49dabf92f691fbca252",
         },
         type="DocumentUnderstanding",
         category="t2i",
@@ -218,7 +218,7 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
             "path": "vidore/biomedical_lectures_v2",
-            "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
+            "revision": "a29202f0da409034d651614d87cd8938d254e2ea",
         },
         type="DocumentUnderstanding",
         category="t2i",
@@ -274,7 +274,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         reference="https://arxiv.org/pdf/2407.01449",
         dataset={
             "path": "vidore/esg_reports_human_labeled_v2",
-            "revision": "162ba2fc1a8437eda8b6c37b240bc1c0f0deb092",
+            "revision": "6d467dedb09a75144ede1421747e47cf036857dd",
         },
         type="DocumentUnderstanding",
         category="t2i",

From 4ab64e410365e8f5bc04a39dbcdac63112062620 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Mon, 26 May 2025 10:25:20 +0200
Subject: [PATCH 08/11] vidore v2 citation

---
 mteb/benchmarks/benchmarks.py                 | 10 +--
 .../multilingual/Vidore2BenchRetrieval.py     | 64 +++++++++----------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py
index 7f863c9788..5ecf3f2730 100644
--- a/mteb/benchmarks/benchmarks.py
+++ b/mteb/benchmarks/benchmarks.py
@@ -1948,11 +1948,11 @@
     description="Retrieve associated pages according to questions.",
     reference="https://arxiv.org/abs/2407.01449",
     citation=r"""
-@article{faysse2024colpali,
-  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal = {arXiv preprint arXiv:2407.01449},
-  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
-  year = {2024},
+@article{mace2025vidorev2,
+  author = {Macé, Quentin and Loison António and Faysse, Manuel},
+  journal = {arXiv preprint arXiv:2505.17166},
+  title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+  year = {2025},
 }
 """,
 )
diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index bfd3adec1e..25c5f8868d 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -121,14 +121,14 @@ class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation=r"""
-@article{faysse2024colpali,
-  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal = {arXiv preprint arXiv:2407.01449},
-  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
-  year = {2024},
-}
-""",
+        citation=r"""
+    @article{mace2025vidorev2,
+    author = {Macé, Quentin and Loison António and Faysse, Manuel},
+    journal = {arXiv preprint arXiv:2505.17166},
+    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+    year = {2025},
+    }
+    """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -177,14 +177,14 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation=r"""
-@article{faysse2024colpali,
-  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal = {arXiv preprint arXiv:2407.01449},
-  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
-  year = {2024},
-}
-""",
+        citation=r"""
+    @article{mace2025vidorev2,
+    author = {Macé, Quentin and Loison António and Faysse, Manuel},
+    journal = {arXiv preprint arXiv:2505.17166},
+    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+    year = {2025},
+    }
+    """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -233,14 +233,14 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation=r"""
-@article{faysse2024colpali,
-  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal = {arXiv preprint arXiv:2407.01449},
-  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
-  year = {2024},
-}
-""",
+        citation=r"""
+    @article{mace2025vidorev2,
+    author = {Macé, Quentin and Loison António and Faysse, Manuel},
+    journal = {arXiv preprint arXiv:2505.17166},
+    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+    year = {2025},
+    }
+    """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -289,14 +289,14 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        bibtex_citation=r"""
-@article{faysse2024colpali,
-  author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre},
-  journal = {arXiv preprint arXiv:2407.01449},
-  title = {ColPali: Efficient Document Retrieval with Vision Language Models},
-  year = {2024},
-}
-""",
+        citation=r"""
+    @article{mace2025vidorev2,
+    author = {Macé, Quentin and Loison António and Faysse, Manuel},
+    journal = {arXiv preprint arXiv:2505.17166},
+    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+    year = {2025},
+    }
+    """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,

From e15595ae4e6a8c693318c864b2c4c40a21c4b839 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Mon, 26 May 2025 15:00:46 +0200
Subject: [PATCH 09/11] update citation format and fix per-language mappings

---
 .../multilingual/Vidore2BenchRetrieval.py     | 74 +++++++++++--------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index 25c5f8868d..bd02411830 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -26,9 +26,9 @@ def _load_data(
         queries = {}
         relevant_docs = {}
     else:
-        corpus = dict.fromkeys(langs, {})
-        queries = dict.fromkeys(langs, {})
-        relevant_docs = dict.fromkeys(langs, {})
+        corpus = {lang: {} for lang in langs}
+        queries = {lang: {} for lang in langs}
+        relevant_docs = {lang: {} for lang in langs}
 
     for split in splits:
         query_ds = load_dataset(
@@ -122,13 +122,13 @@ class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
         modalities=["text", "image"],
         sample_creation="found",
         citation=r"""
-    @article{mace2025vidorev2,
-    author = {Macé, Quentin and Loison António and Faysse, Manuel},
-    journal = {arXiv preprint arXiv:2505.17166},
-    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-    year = {2025},
-    }
-    """,
+@article{mace2025vidorev2,
+author = {Macé, Quentin and Loison António and Faysse, Manuel},
+journal = {arXiv preprint arXiv:2505.17166},
+title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+year = {2025},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -144,6 +144,9 @@ class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
     )
 
     def load_data(self, **kwargs):
+        if self.data_loaded:
+            return
+
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],
@@ -178,13 +181,13 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
         modalities=["text", "image"],
         sample_creation="found",
         citation=r"""
-    @article{mace2025vidorev2,
-    author = {Macé, Quentin and Loison António and Faysse, Manuel},
-    journal = {arXiv preprint arXiv:2505.17166},
-    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-    year = {2025},
-    }
-    """,
+@article{mace2025vidorev2,
+author = {Macé, Quentin and Loison António and Faysse, Manuel},
+journal = {arXiv preprint arXiv:2505.17166},
+title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+year = {2025},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -200,6 +203,9 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
     )
 
     def load_data(self, **kwargs):
+        if self.data_loaded:
+            return
+
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],
@@ -234,13 +240,13 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
         modalities=["text", "image"],
         sample_creation="found",
         citation=r"""
-    @article{mace2025vidorev2,
-    author = {Macé, Quentin and Loison António and Faysse, Manuel},
-    journal = {arXiv preprint arXiv:2505.17166},
-    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-    year = {2025},
-    }
-    """,
+@article{mace2025vidorev2,
+author = {Macé, Quentin and Loison António and Faysse, Manuel},
+journal = {arXiv preprint arXiv:2505.17166},
+title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+year = {2025},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -256,6 +262,9 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
     )
 
     def load_data(self, **kwargs):
+        if self.data_loaded:
+            return
+
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],
@@ -290,13 +299,13 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         modalities=["text", "image"],
         sample_creation="found",
         citation=r"""
-    @article{mace2025vidorev2,
-    author = {Macé, Quentin and Loison António and Faysse, Manuel},
-    journal = {arXiv preprint arXiv:2505.17166},
-    title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-    year = {2025},
-    }
-    """,
+@article{mace2025vidorev2,
+author = {Macé, Quentin and Loison António and Faysse, Manuel},
+journal = {arXiv preprint arXiv:2505.17166},
+title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+year = {2025},
+}
+""",
         prompt={"query": "Find a screenshot that relevant to the user's question."},
         descriptive_stats={
             "n_samples": None,
@@ -312,6 +321,9 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
     )
 
     def load_data(self, **kwargs):
+        if self.data_loaded:
+            return
+
         self.corpus, self.queries, self.relevant_docs = _load_data(
             path=self.metadata_dict["dataset"]["path"],
             splits=self.metadata_dict["eval_splits"],

From 0de4356c4e5beb20821d8095a10ad35eb394e7b4 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Mon, 26 May 2025 15:41:19 +0200
Subject: [PATCH 10/11] lint: citations

---
 .../multilingual/Vidore2BenchRetrieval.py     | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index bd02411830..f522e964da 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -123,10 +123,10 @@ class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
         sample_creation="found",
         citation=r"""
 @article{mace2025vidorev2,
-author = {Macé, Quentin and Loison António and Faysse, Manuel},
-journal = {arXiv preprint arXiv:2505.17166},
-title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-year = {2025},
+  author = {Macé, Quentin and Loison António and Faysse, Manuel},
+  journal = {arXiv preprint arXiv:2505.17166},
+  title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+  year = {2025},
 }
 """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
@@ -182,10 +182,10 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
         sample_creation="found",
         citation=r"""
 @article{mace2025vidorev2,
-author = {Macé, Quentin and Loison António and Faysse, Manuel},
-journal = {arXiv preprint arXiv:2505.17166},
-title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-year = {2025},
+  author = {Macé, Quentin and Loison António and Faysse, Manuel},
+  journal = {arXiv preprint arXiv:2505.17166},
+  title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+  year = {2025},
 }
 """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
@@ -241,10 +241,10 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
         sample_creation="found",
         citation=r"""
 @article{mace2025vidorev2,
-author = {Macé, Quentin and Loison António and Faysse, Manuel},
-journal = {arXiv preprint arXiv:2505.17166},
-title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-year = {2025},
+  author = {Macé, Quentin and Loison António and Faysse, Manuel},
+  journal = {arXiv preprint arXiv:2505.17166},
+  title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+  year = {2025},
 }
 """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},
@@ -300,10 +300,10 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         sample_creation="found",
         citation=r"""
 @article{mace2025vidorev2,
-author = {Macé, Quentin and Loison António and Faysse, Manuel},
-journal = {arXiv preprint arXiv:2505.17166},
-title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
-year = {2025},
+  author = {Macé, Quentin and Loison António and Faysse, Manuel},
+  journal = {arXiv preprint arXiv:2505.17166},
+  title = {ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval},
+  year = {2025},
 }
 """,
         prompt={"query": "Find a screenshot that relevant to the user's question."},

From ab28dedbb38599e0f09b2aa6a739c30fab465e02 Mon Sep 17 00:00:00 2001
From: Paul Teiletche <paul.teiletche@gmail.com>
Date: Mon, 26 May 2025 17:00:29 +0200
Subject: [PATCH 11/11] typo citations

---
 .../multilingual/Vidore2BenchRetrieval.py                 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
index f522e964da..ad0eb959f4 100644
--- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
+++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/Vidore2BenchRetrieval.py
@@ -121,7 +121,7 @@ class Vidore2ESGReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval):
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        citation=r"""
+        bibtex_citation=r"""
 @article{mace2025vidorev2,
   author = {Macé, Quentin and Loison António and Faysse, Manuel},
   journal = {arXiv preprint arXiv:2505.17166},
@@ -180,7 +180,7 @@ class Vidore2EconomicsReportsRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        citation=r"""
+        bibtex_citation=r"""
 @article{mace2025vidorev2,
   author = {Macé, Quentin and Loison António and Faysse, Manuel},
   journal = {arXiv preprint arXiv:2505.17166},
@@ -239,7 +239,7 @@ class Vidore2BioMedicalLecturesRetrieval(MultilingualTask, AbsTaskAny2AnyRetriev
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        citation=r"""
+        bibtex_citation=r"""
 @article{mace2025vidorev2,
   author = {Macé, Quentin and Loison António and Faysse, Manuel},
   journal = {arXiv preprint arXiv:2505.17166},
@@ -298,7 +298,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskAny2AnyRetrieval):
         dialect=[],
         modalities=["text", "image"],
         sample_creation="found",
-        citation=r"""
+        bibtex_citation=r"""
 @article{mace2025vidorev2,
   author = {Macé, Quentin and Loison António and Faysse, Manuel},
   journal = {arXiv preprint arXiv:2505.17166},