From 607aac816893986a573ed988ec9dc532fec27e13 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Thu, 2 Jan 2025 23:40:51 +0300 Subject: [PATCH 1/2] fix nano beir --- .../tasks/Retrieval/eng/NanoArguAnaRetrieval.py | 17 ++++++++++------- .../Retrieval/eng/NanoClimateFeverRetrieval.py | 17 ++++++++++------- .../tasks/Retrieval/eng/NanoDBPediaRetrieval.py | 17 ++++++++++------- mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py | 17 ++++++++++------- .../Retrieval/eng/NanoFiQA2018Retrieval.py | 17 ++++++++++------- .../Retrieval/eng/NanoHotpotQARetrieval.py | 17 ++++++++++------- .../tasks/Retrieval/eng/NanoMSMARCORetrieval.py | 17 ++++++++++------- .../Retrieval/eng/NanoNFCorpusRetrieval.py | 17 ++++++++++------- mteb/tasks/Retrieval/eng/NanoNQRetrieval.py | 16 +++++++++------- mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py | 16 +++++++++------- .../tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py | 17 ++++++++++------- .../tasks/Retrieval/eng/NanoSciFactRetrieval.py | 16 +++++++++------- .../Retrieval/eng/NanoTouche2020Retrieval.py | 16 +++++++++------- 13 files changed, 126 insertions(+), 91 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py index 2230368b94..e5dd1a8aa6 100644 --- a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py index 0185a454d3..6aaf1e27bb 100644 --- a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py index caa638743c..710b3a8706 100644 --- a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -64,12 +66,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py index 6bdd0ab4cf..221998861f 100644 --- a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -88,12 +90,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py index 1a3467c1d7..ea7e2b1cbb 100644 --- a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py index 4389aeafa8..2a646f44c2 100644 --- a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -91,12 +93,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py index 8a2f51e7fd..89019a9803 100644 --- a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -86,12 +88,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py index 0f6ac8533a..d5d4ba8e75 100644 --- a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -76,12 +78,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py index 5aa831f799..536e5a0d93 100644 --- a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -72,12 +74,12 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py index 1391d12b93..2c21fb1a6e 100644 --- a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -75,12 +77,12 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py index 2d27e1a2dc..670decacb5 100644 --- a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -74,12 +76,13 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py index aff949d319..b921562c86 100644 --- a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -72,12 +74,12 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py index 656b5494a0..f301a9d9e5 100644 --- a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections import defaultdict + from datasets import load_dataset from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval @@ -83,12 +85,12 @@ def load_data(self, **kwargs): for split in self.queries } - self.relevant_docs = { - split: { - sample["query-id"]: {sample["corpus-id"]: 1} - for sample in self.relevant_docs[split] - } - for split in self.relevant_docs - } + relevant_docs = {} + + for split in self.relevant_docs: + relevant_docs[split] = defaultdict(dict) + for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + relevant_docs[split][query_id][corpus_id] = 1 + self.relevant_docs = relevant_docs self.data_loaded = True From 6aa41e8fadc74677d5b206f5895606a91348e5d5 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Thu, 2 Jan 2025 23:42:45 +0300 Subject: [PATCH 2/2] lint --- mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoNQRetrieval.py | 5 ++++- mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py | 5 ++++- mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py | 5 ++++- mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py | 5 ++++- 13 files changed, 52 insertions(+), 22 deletions(-) diff --git a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py index e5dd1a8aa6..7b5a728537 100644 --- a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py @@ -80,8 +80,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py index 6aaf1e27bb..b297dec5e3 100644 --- a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py @@ -80,8 +80,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py index 710b3a8706..37826697be 100644 --- a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py @@ -70,8 +70,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py index 221998861f..636bfd12a1 100644 --- a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py @@ -94,8 +94,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py index ea7e2b1cbb..4129a18137 100644 --- a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py @@ -80,8 +80,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py index 2a646f44c2..6c5a0a1b1d 100644 --- a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py @@ -97,8 +97,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py index 89019a9803..c603e2cc5b 100644 --- a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py @@ -92,8 +92,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py index d5d4ba8e75..725c7e889c 100644 --- a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py @@ -82,8 +82,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py index 536e5a0d93..538a0881fa 100644 --- a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py @@ -78,7 +78,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py index 2c21fb1a6e..ac527acba2 100644 --- a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py @@ -81,7 +81,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py index 670decacb5..f521d693d0 100644 --- a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py @@ -80,8 +80,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], - self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py index b921562c86..a24fa4e102 100644 --- a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py @@ -78,7 +78,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs diff --git a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py index f301a9d9e5..b5fccbedf6 100644 --- a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py @@ -89,7 +89,10 @@ def load_data(self, **kwargs): for split in self.relevant_docs: relevant_docs[split] = defaultdict(dict) - for query_id, corpus_id in zip(self.relevant_docs[split]["query-id"], self.relevant_docs[split]["corpus-id"]): + for query_id, corpus_id in zip( + self.relevant_docs[split]["query-id"], + self.relevant_docs[split]["corpus-id"], + ): relevant_docs[split][query_id][corpus_id] = 1 self.relevant_docs = relevant_docs