diff --git a/README.md b/README.md
index 3c659bbde5..d105a7aeb8 100644
--- a/README.md
+++ b/README.md
@@ -379,6 +379,28 @@ results = mteb.load_results(models=models, tasks=tasks)
df = results_to_dataframe(results)
```
+
+
+
+
+ Annotate Contamination in the training data of a model
+
+### Annotate Contamination
+
+have your found contamination in the training data of a model? Please let us know, either by opening an issue or ideally by submitting a PR
+annotatig the training datasets of the model:
+
+```py
+model_w_contamination = ModelMeta(
+ name = "model-with-contamination"
+ ...
+ training_datasets: {"ArguAna": # name of dataset within MTEB
+ ["test"]} # the splits that have been trained on
+ ...
+)
+```
+
+
diff --git a/mteb/model_meta.py b/mteb/model_meta.py
index 83653ec3d1..9ac590b5d6 100644
--- a/mteb/model_meta.py
+++ b/mteb/model_meta.py
@@ -72,8 +72,9 @@ class ModelMeta(BaseModel):
in the Latin script.
use_instructions: Whether the model uses instructions E.g. for prompt-based models. This also include models that require a specific format for
input such as "query: {document}" or "passage: {document}".
- zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models
- are evaluated non-zero-shot unless specified otherwise.
+ training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example
+ {"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to
+ a benchmark as well as mark dataset contaminations.
adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc.
superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1.
"""
@@ -97,7 +98,7 @@ class ModelMeta(BaseModel):
reference: STR_URL | None = None
similarity_fn_name: DISTANCE_METRICS | None = None
use_instructions: bool | None = None
- zero_shot_benchmarks: list[str] | None = None
+ training_datasets: dict[str, list[str]] | None = None
adapted_from: str | None = None
superseded_by: str | None = None