From 365bb0a5ffee97209479317a6ff6948cb42e0e2c Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 7 Dec 2024 09:58:36 -0800 Subject: [PATCH 1/3] fix: Add training dataset to model meta Adresses #1556 --- mteb/model_meta.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 83653ec3d1..03b7d5644c 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -72,8 +72,9 @@ class ModelMeta(BaseModel): in the Latin script. use_instructions: Whether the model uses instructions E.g. for prompt-based models. This also include models that require a specific format for input such as "query: {document}" or "passage: {document}". - zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models - are evaluated non-zero-shot unless specified otherwise. + training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example + {"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to + a benchmark as well as mark dataset contaminations. adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1. """ @@ -97,7 +98,7 @@ class ModelMeta(BaseModel): reference: STR_URL | None = None similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None - zero_shot_benchmarks: list[str] | None = None + training_datasets: dict[str, list[str]] | None = None adapted_from: str | None = None superseded_by: str | None = None From bb921500ab22af17dad7aff3f08d7dcc77b22b43 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 7 Dec 2024 10:08:37 -0800 Subject: [PATCH 2/3] Added docs --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 3c659bbde5..d105a7aeb8 100644 --- a/README.md +++ b/README.md @@ -379,6 +379,28 @@ results = mteb.load_results(models=models, tasks=tasks) df = results_to_dataframe(results) ``` + + + +
+ Annotate Contamination in the training data of a model + +### Annotate Contamination + +have your found contamination in the training data of a model? Please let us know, either by opening an issue or ideally by submitting a PR +annotatig the training datasets of the model: + +```py +model_w_contamination = ModelMeta( + name = "model-with-contamination" + ... + training_datasets: {"ArguAna": # name of dataset within MTEB + ["test"]} # the splits that have been trained on + ... +) +``` + +
From 72cbcb5fa05e8c1a93a122af1d89c3d264d3ee7b Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 7 Dec 2024 10:08:55 -0800 Subject: [PATCH 3/3] format --- mteb/model_meta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 03b7d5644c..9ac590b5d6 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -72,7 +72,7 @@ class ModelMeta(BaseModel): in the Latin script. use_instructions: Whether the model uses instructions E.g. for prompt-based models. This also include models that require a specific format for input such as "query: {document}" or "passage: {document}". - training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example + training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example {"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to a benchmark as well as mark dataset contaminations. adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. @@ -98,7 +98,7 @@ class ModelMeta(BaseModel): reference: STR_URL | None = None similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None - training_datasets: dict[str, list[str]] | None = None + training_datasets: dict[str, list[str]] | None = None adapted_from: str | None = None superseded_by: str | None = None