diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 4a8146b3de..0191eb58c5 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -74,6 +74,8 @@ class ModelMeta(BaseModel): input such as "query: {document}" or "passage: {document}". zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models are evaluated non-zero-shot unless specified otherwise. + adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. + supersedes: Name of the model that this model supersedes, e.g. nvidia/NV-Embed-v2 supersedes v1. """ model_config = ConfigDict(extra="forbid") @@ -96,6 +98,8 @@ class ModelMeta(BaseModel): similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None zero_shot_benchmarks: list[str] | None = None + adapted_from: str | None = None + supersedes: str | None = None def to_dict(self): dict_repr = self.model_dump() diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 7a3116e667..78458369d1 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -70,7 +70,7 @@ memory_usage=None, embed_dim=384, license="apache-2.0", - max_tokens=512, + max_tokens=256, reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], @@ -127,3 +127,41 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, ) + +multi_qa_MiniLM_L6_cos_v1 = ModelMeta( + name="sentence-transformer/multi-qa-MiniLM-L6-cos-v1", + languages=["eng-Latn"], + open_weights=True, + revision="b207367332321f8e44f96e224ef15bc607f4dbf0", # can be any + release_date="2021-08-30", + n_parameters=22_700_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + supersedes=None, + adapted_from=None, +) + +all_mpnet_base_v2 = ModelMeta( + name="sentence-transformers/all-mpnet-base-v2", + languages=["eng-Latn"], + open_weights=True, + revision="9a3225965996d404b775526de6dbfe85d3368642", # can be any + release_date="2021-08-30", + n_parameters=109_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=384, + reference="https://huggingface.co/sentence-transformers/all-mpnet-base-v2", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + supersedes="sentence-transformers/all-mpnet-base-v1", + adapted_from=None, +)