diff --git a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py index 2230368b94..7b5a728537 100644 --- a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py index 0185a454d3..b297dec5e3 100644 --- a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py index caa638743c..37826697be 100644 --- a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -64,12 +66,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py index 6bdd0ab4cf..636bfd12a1 100644 --- a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -88,12 +90,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py index 1a3467c1d7..4129a18137 100644 --- a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py index 4389aeafa8..6c5a0a1b1d 100644 --- a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -91,12 +93,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py index 8a2f51e7fd..c603e2cc5b 100644 --- a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -86,12 +88,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py index 0f6ac8533a..725c7e889c 100644 --- a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -76,12 +78,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py index 5aa831f799..538a0881fa 100644 --- a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -72,12 +74,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py index 1391d12b93..ac527acba2 100644 --- a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -75,12 +77,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py index 2d27e1a2dc..f521d693d0 100644 --- a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py index aff949d319..a24fa4e102 100644 --- a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -72,12 +74,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py index 656b5494a0..b5fccbedf6 100644 --- a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -83,12 +85,15 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True