diff --git a/README.md b/README.md index e35ad3bdbc..a7eb03e4f2 100644 --- a/README.md +++ b/README.md @@ -378,6 +378,7 @@ df = results_to_dataframe(results) | Documentation | | | ------------------------------ | ---------------------- | | πŸ“‹ [Tasks] |Β Overview of available tasks | +| πŸ“ [Benchmarks] | Overview of available benchmarks | | πŸ“ˆ [Leaderboard] | The interactive leaderboard of the benchmark | | πŸ€– [Adding a model] | Information related to how to submit a model to the leaderboard | | πŸ‘©β€πŸ”¬ [Reproducible workflows] | Information related to how to reproduce and create reproducible workflows with MTEB | @@ -387,6 +388,7 @@ df = results_to_dataframe(results) | 🌐 [MMTEB] | An open-source effort to extend MTEB to cover a broad set of languages | Β  [Tasks]: docs/tasks.md +[Benchmarks]: docs/benchmarks.md [Contributing]: CONTRIBUTING.md [Adding a model]: docs/adding_a_model.md [Adding a dataset]: docs/adding_a_dataset.md diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 9eb471d187..a5abe50215 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -1,5 +1,5 @@ ## Available benchmarks -The following tables give you an overview of the benchmarks in MTEB. +The following table gives you an overview of the benchmarks in MTEB.
diff --git a/mteb/cli.py b/mteb/cli.py index 24e99bd241..b891d381f4 100644 --- a/mteb/cli.py +++ b/mteb/cli.py @@ -30,6 +30,14 @@ mteb available_tasks --task_types Clustering # list tasks of type Clustering ``` +## Listing Available Benchmarks + +To list the available benchmarks within MTEB, use the `mteb available_benchmarks` command. For example: + +```bash +mteb available_benchmarks # list all available benchmarks +``` + ## Creating Model Metadata @@ -144,6 +152,12 @@ def run(args: argparse.Namespace) -> None: _save_model_metadata(model, Path(args.output_folder)) +def available_benchmarks(args: argparse.Namespace) -> None: + benchmarks = mteb.get_benchmarks() + eval = mteb.MTEB(tasks=benchmarks) + eval.mteb_benchmarks() + + def available_tasks(args: argparse.Namespace) -> None: tasks = mteb.get_tasks( categories=args.categories, @@ -198,6 +212,15 @@ def add_available_tasks_parser(subparsers) -> None: parser.set_defaults(func=available_tasks) +def add_available_benchmarks_parser(subparsers) -> None: + parser = subparsers.add_parser( + "available_benchmarks", help="List the available benchmarks within MTEB" + ) + add_task_selection_args(parser) + + parser.set_defaults(func=available_benchmarks) + + def add_run_parser(subparsers) -> None: parser = subparsers.add_parser("run", help="Run a model on a set of tasks") @@ -321,6 +344,7 @@ def main(): ) add_run_parser(subparsers) add_available_tasks_parser(subparsers) + add_available_benchmarks_parser(subparsers) add_create_meta_parser(subparsers) args = parser.parse_args() diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index ab25169364..70f3e21ca8 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -168,6 +168,12 @@ def _display_tasks(self, task_list, name=None): console.print(f"{prefix}{name}{category}{multilingual}") console.print("\n") + def mteb_benchmarks(self): + """Get all benchmarks available in the MTEB.""" + for benchmark in self._tasks: + name = benchmark.name + self._display_tasks(benchmark.tasks, name=name) + @classmethod def mteb_tasks(cls): """Get all tasks available in the MTEB.""" diff --git a/tests/test_cli.py b/tests/test_cli.py index fdcd1b014a..1d0400e985 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,6 +22,15 @@ def test_available_tasks(): ), "Sample task Banking77Classification task not found in available tasks" +def test_available_benchmarks(): + command = f"{sys.executable} -m mteb available_benchmarks" + result = subprocess.run(command, shell=True, capture_output=True, text=True) + assert result.returncode == 0, "Command failed" + assert ( + "MTEB(eng)" in result.stdout + ), "Sample benchmark MTEB(eng) task not found in available bencmarks" + + run_task_fixures = [ ( "average_word_embeddings_komninos",