diff --git a/mteb/abstasks/task_metadata.py b/mteb/abstasks/task_metadata.py index c781ed1331..294d8bb9f8 100644 --- a/mteb/abstasks/task_metadata.py +++ b/mteb/abstasks/task_metadata.py @@ -20,7 +20,6 @@ ) from typing_extensions import Required, TypedDict # noqa: TC002 -import mteb from mteb.languages import check_language_code from mteb.types import ( Languages, @@ -588,9 +587,10 @@ def _create_dataset_card_data( multilinguality = "translated" if self.adapted_from is not None: + from mteb.get_tasks import get_tasks + source_datasets = [ - task.metadata.dataset["path"] - for task in mteb.get_tasks(self.adapted_from) + task.metadata.dataset["path"] for task in get_tasks(self.adapted_from) ] source_datasets.append(self.dataset["path"]) else: diff --git a/mteb/benchmarks/_create_table.py b/mteb/benchmarks/_create_table.py index 448306d1a3..2d85d46ca2 100644 --- a/mteb/benchmarks/_create_table.py +++ b/mteb/benchmarks/_create_table.py @@ -8,8 +8,8 @@ import numpy as np import pandas as pd -import mteb from mteb.get_tasks import get_task, get_tasks +from mteb.models.get_model_meta import get_model_meta if TYPE_CHECKING: from mteb.results.benchmark_results import BenchmarkResults @@ -145,7 +145,7 @@ def _create_summary_table_from_benchmark_results( joint_table = joint_table.reset_index() # Add model metadata - model_metas = joint_table["model_name"].map(mteb.get_model_meta) + model_metas = joint_table["model_name"].map(get_model_meta) joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) @@ -386,7 +386,7 @@ def _create_summary_table_mean_public_private( joint_table = joint_table.reset_index() # Add model metadata - model_metas = joint_table["model_name"].map(mteb.get_model_meta) + model_metas = joint_table["model_name"].map(get_model_meta) joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) @@ -505,7 +505,7 @@ def _create_summary_table_mean_subset( joint_table = joint_table.reset_index() # Add model metadata - model_metas = joint_table["model_name"].map(mteb.get_model_meta) + model_metas = joint_table["model_name"].map(get_model_meta) joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) @@ -622,7 +622,7 @@ def _create_summary_table_mean_task_type( joint_table = joint_table.reset_index() # Add model metadata - model_metas = joint_table["model_name"].map(mteb.get_model_meta) + model_metas = joint_table["model_name"].map(get_model_meta) joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) diff --git a/mteb/cache.py b/mteb/cache.py index 1df01e2a41..6cea3d0026 100644 --- a/mteb/cache.py +++ b/mteb/cache.py @@ -16,11 +16,12 @@ import requests from pydantic import ValidationError -import mteb from mteb._helpful_enum import HelpfulStrEnum from mteb.abstasks import AbsTask from mteb.benchmarks.benchmark import Benchmark +from mteb.benchmarks.get_benchmark import get_benchmark from mteb.models import ModelMeta +from mteb.models.get_model_meta import get_model_metas from mteb.models.model_meta import _serialize_experiment_kwargs_to_name from mteb.results import BenchmarkResults, ModelResult, TaskResult @@ -548,7 +549,7 @@ def _rebuild_from_full_repository(self, quick_cache_path: Path) -> BenchmarkResu all_model_names = [ model_meta.name - for model_meta in mteb.get_model_metas() + for model_meta in get_model_metas() if model_meta.name is not None ] @@ -861,7 +862,7 @@ def load_results( ... ) """ if isinstance(tasks, str): - tasks = mteb.get_benchmark(tasks) + tasks = get_benchmark(tasks) if isinstance(load_experiments, str): load_experiments = LoadExperimentEnum.from_str(load_experiments) diff --git a/mteb/deprecated_evaluator.py b/mteb/deprecated_evaluator.py index 14f0db9fda..cbc6d52a91 100644 --- a/mteb/deprecated_evaluator.py +++ b/mteb/deprecated_evaluator.py @@ -15,9 +15,9 @@ import datasets -import mteb from mteb.abstasks import AbsTask from mteb.benchmarks import Benchmark +from mteb.get_tasks import get_tasks from mteb.models import ( CrossEncoderWrapper, ModelMeta, @@ -156,7 +156,7 @@ def mteb_benchmarks(self): @classmethod def mteb_tasks(cls): """Get all tasks available in the MTEB.""" - tasks = mteb.get_tasks() + tasks = get_tasks() instance = cls(tasks) instance._display_tasks(tasks, name="MTEB tasks") diff --git a/mteb/models/abs_encoder.py b/mteb/models/abs_encoder.py index 53d6c8b0aa..5c254e13a3 100644 --- a/mteb/models/abs_encoder.py +++ b/mteb/models/abs_encoder.py @@ -5,7 +5,6 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Literal, cast, get_args, overload -import mteb from mteb.abstasks.task_metadata import TaskType from mteb.similarity_functions import ( cos_sim, @@ -166,7 +165,9 @@ def validate_task_to_prompt_name( if task_name not in task_types and task_name not in prompt_types: try: - mteb.get_task(task_name=task_name) + from mteb.get_tasks import get_task + + get_task(task_name=task_name) except KeyError: msg = f"Task name {task_name} is not valid. {valid_keys_msg}" logger.warning(msg) @@ -224,7 +225,9 @@ def get_instruction( if prompt: return prompt - abstask = mteb.get_task(task_name=task_metadata.name) + from mteb.get_tasks import get_task + + abstask = get_task(task_name=task_metadata.name) return abstask.abstask_prompt def format_instruction( diff --git a/mteb/results/task_result.py b/mteb/results/task_result.py index f5c2d1ac13..4b358dd80a 100644 --- a/mteb/results/task_result.py +++ b/mteb/results/task_result.py @@ -15,8 +15,6 @@ from pydantic import BaseModel, field_validator from typing_extensions import deprecated -import mteb -from mteb import TaskMetadata from mteb._helpful_enum import HelpfulStrEnum from mteb._hf_integration.eval_result_model import ( HFEvalResult, @@ -26,6 +24,7 @@ ) from mteb.abstasks import AbsTaskClassification from mteb.abstasks.abstask import AbsTask +from mteb.abstasks.task_metadata import TaskMetadata from mteb.languages import LanguageScripts from mteb.models.model_meta import ScoringFunction from mteb.types import ( @@ -909,7 +908,9 @@ def get_hf_eval_results(self) -> list[EvalResult]: return results def _to_hf_benchmark_result(self, user: str | None = None) -> HFEvalResults: - task_metadata = mteb.get_task(self.task_name).metadata + from mteb.get_tasks import get_task + + task_metadata = get_task(self.task_name).metadata dataset_id = task_metadata.dataset["path"] dataset_revision = task_metadata.dataset["revision"] eval_results = [] diff --git a/tests/test_result_cache_load_from_cache.py b/tests/test_result_cache_load_from_cache.py index d758350e97..b32e4414da 100644 --- a/tests/test_result_cache_load_from_cache.py +++ b/tests/test_result_cache_load_from_cache.py @@ -98,7 +98,7 @@ def test_full_rebuild_process(self, tmp_path): with ( patch.object(cache, "download_from_remote") as mock_download, patch.object(cache, "load_results") as mock_load_results, - patch("mteb.get_model_metas") as mock_get_model_metas, + patch("mteb.cache.get_model_metas") as mock_get_model_metas, ): # Mock model metas - None names should be filtered meta1 = MagicMock() @@ -138,7 +138,7 @@ def test_rebuild_error_propagation(self, tmp_path): with ( patch.object(cache, "download_from_remote"), patch.object(cache, "load_results") as mock_load_results, - patch("mteb.get_model_metas") as mock_get_model_metas, + patch("mteb.cache.get_model_metas") as mock_get_model_metas, ): meta = MagicMock() meta.name = "model1"