diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 23f0a095ea..61eb6bfb3c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.9" + python-version: "3.10" cache: "pip" - name: Install dependencies diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index eac41856e8..b47c8d68c0 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -150,7 +150,7 @@ def _evaluate_subset( ) # we store idxs to make the shuffling reproducible for i in range(self.n_experiments): logger.info( - "=" * 10 + f" Experiment {i+1}/{self.n_experiments} " + "=" * 10 + "=" * 10 + f" Experiment {i + 1}/{self.n_experiments} " + "=" * 10 ) # Bootstrap `self.samples_per_label` samples per label for each split X_sampled, y_sampled, idxs = self._undersample_data( diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index cbcb125021..f9052c8b47 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -215,7 +215,7 @@ def _evaluate_subset( for i_experiment, sample_indices in enumerate(train_samples): logger.info( "=" * 10 - + f" Experiment {i_experiment+1}/{self.n_experiments} " + + f" Experiment {i_experiment + 1}/{self.n_experiments} " + "=" * 10 ) X_train = np.stack([unique_train_embeddings[idx] for idx in sample_indices]) diff --git a/mteb/abstasks/AbsTaskSpeedTask.py b/mteb/abstasks/AbsTaskSpeedTask.py index 7a73da445b..0d3f8ae8dd 100644 --- a/mteb/abstasks/AbsTaskSpeedTask.py +++ b/mteb/abstasks/AbsTaskSpeedTask.py @@ -78,7 +78,7 @@ def get_system_info(self) -> dict[str, str]: list_gpus.append( { "gpu_name": gpu.name, - "gpu_total_memory": f"{gpu.memoryTotal/1024.0} GB", + "gpu_total_memory": f"{gpu.memoryTotal / 1024.0} GB", } ) info["gpu_info"] = list_gpus diff --git a/mteb/abstasks/Image/AbsTaskImageClassification.py b/mteb/abstasks/Image/AbsTaskImageClassification.py index 7add58296a..c7afbe1136 100644 --- a/mteb/abstasks/Image/AbsTaskImageClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageClassification.py @@ -133,7 +133,7 @@ def _evaluate_subset( ) # we store idxs to make the shuffling reproducible for i in range(self.n_experiments): logger.info( - "=" * 10 + f" Experiment {i+1}/{self.n_experiments} " + "=" * 10 + "=" * 10 + f" Experiment {i + 1}/{self.n_experiments} " + "=" * 10 ) # Bootstrap `self.samples_per_label` samples per label for each split undersampled_train, idxs = self._undersample_data( diff --git a/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py b/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py index dc779d5e69..26fd799d56 100644 --- a/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageMultilabelClassification.py @@ -183,7 +183,7 @@ def _evaluate_subset( for i_experiment, sample_indices in enumerate(train_samples): logger.info( "=" * 10 - + f" Experiment {i_experiment+1}/{self.n_experiments} " + + f" Experiment {i_experiment + 1}/{self.n_experiments} " + "=" * 10 ) X_train = np.stack([unique_train_embeddings[idx] for idx in sample_indices]) diff --git a/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py b/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py index 2c823e85dd..b9c3683ed4 100644 --- a/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py @@ -52,7 +52,7 @@ def load_data(self, **kwargs): self.queries["test"] = dict( zip( - [f"q{x+1}" for x in range(len(query_ds["full"]))], + [f"q{x + 1}" for x in range(len(query_ds["full"]))], query_ds["full"]["query"], ) ) @@ -69,7 +69,7 @@ def load_data(self, **kwargs): } self.relevant_docs["test"] = { - f"q{e+1}": dict(zip([f"d{i}" for i in ids], range(1, len(ids) + 1))) + f"q{e + 1}": dict(zip([f"d{i}" for i in ids], range(1, len(ids) + 1))) for e, ids in enumerate(query_ds["full"]["corpusids"]) } diff --git a/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py b/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py index 0b26fd1079..ea08c03225 100644 --- a/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py +++ b/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py @@ -56,12 +56,12 @@ def load_data(self, **kwargs): dataset_path, split=f"{split}[:{n_sample}]" ) # Transforming news summary into retrieval task - queries = {f"q{e+1}": x["sum"] for e, x in enumerate(split_ds)} + queries = {f"q{e + 1}": x["sum"] for e, x in enumerate(split_ds)} corpus = { - f"d{e+1}": {"title": x["title"], "text": x["text"]} + f"d{e + 1}": {"title": x["title"], "text": x["text"]} for e, x in enumerate(split_ds) } - qrels = {f"q{i+1}": {f"d{i+1}": 1} for i in range(split_ds.shape[0])} + qrels = {f"q{i + 1}": {f"d{i + 1}": 1} for i in range(split_ds.shape[0])} self.corpus[split], self.queries[split], self.relevant_docs[split] = ( corpus, queries, diff --git a/scripts/task_selection/task_selection_eng_lite.ipynb b/scripts/task_selection/task_selection_eng_lite.ipynb index ec81fe2893..49e4fba565 100644 --- a/scripts/task_selection/task_selection_eng_lite.ipynb +++ b/scripts/task_selection/task_selection_eng_lite.ipynb @@ -2551,7 +2551,7 @@ "for model, revision in mteb_results.items():\n", " for rev, results in revision.items():\n", " print(\n", - " f\"{model}: {sum(res.evaluation_time for res in results) / 3600 :.2f} hours\"\n", + " f\"{model}: {sum(res.evaluation_time for res in results) / 3600:.2f} hours\"\n", " )" ] },