diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 7d6b6276a6..22e5d85ba6 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -89,6 +89,7 @@ class ModelMeta(BaseModel): a benchmark as well as mark dataset contaminations. adapted_from: Name of the model from which this model is adapted. For quantizations, fine-tunes, long doc extensions, etc. superseded_by: Name of the model that supersedes this model, e.g., nvidia/NV-Embed-v2 supersedes v1. + is_cross_encoder: Whether the model can act as a cross-encoder or not. modalities: A list of strings representing the modalities the model supports. Default is ["text"]. """ @@ -114,6 +115,7 @@ class ModelMeta(BaseModel): training_datasets: dict[str, list[str]] | None adapted_from: str | None = None superseded_by: str | None = None + is_cross_encoder: bool | None = None modalities: list[MODALITIES] = ["text"] def to_dict(self): diff --git a/mteb/models/rerankers_custom.py b/mteb/models/rerankers_custom.py index 7c966cdf78..0061ffc1c3 100644 --- a/mteb/models/rerankers_custom.py +++ b/mteb/models/rerankers_custom.py @@ -216,6 +216,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=None, training_datasets=None, framework=["Sentence Transformers", "PyTorch"], + is_cross_encoder=True, ) # languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28 @@ -242,6 +243,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=None, training_datasets=None, framework=["Sentence Transformers", "PyTorch"], + is_cross_encoder=True, ) bge_reranker_v2_m3 = ModelMeta( @@ -300,4 +302,5 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=None, training_datasets=bge_m3_training_data, framework=["Sentence Transformers", "PyTorch"], + is_cross_encoder=True, ) diff --git a/mteb/models/rerankers_monot5_based.py b/mteb/models/rerankers_monot5_based.py index afc31e5a2d..f94508c548 100644 --- a/mteb/models/rerankers_monot5_based.py +++ b/mteb/models/rerankers_monot5_based.py @@ -307,6 +307,7 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) monot5_base = ModelMeta( @@ -332,6 +333,7 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) monot5_large = ModelMeta( @@ -357,6 +359,7 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) monot5_3b = ModelMeta( @@ -382,6 +385,7 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) flant5_base = ModelMeta( @@ -408,16 +412,17 @@ def get_prediction_tokens(self, *args, **kwargs): "quasc": ["train"], "qed": ["train"], }, - n_parameters=None, + n_parameters=248_000_000, memory_usage_mb=944, max_tokens=None, - embed_dim=None, - license=None, + embed_dim=768, + license="apache-2.0", public_training_code=None, public_training_data=None, similarity_fn_name=None, - use_instructions=None, + use_instructions=True, framework=["PyTorch"], + is_cross_encoder=True, ) flant5_large = ModelMeta( @@ -444,16 +449,17 @@ def get_prediction_tokens(self, *args, **kwargs): "quasc": ["train"], "qed": ["train"], }, - n_parameters=None, + n_parameters=783_000_000, + max_tokens=1024, memory_usage_mb=2987, - max_tokens=None, embed_dim=None, - license=None, + license="apache-2.0", public_training_code=None, public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], + is_cross_encoder=True, ) flant5_xl = ModelMeta( @@ -480,16 +486,17 @@ def get_prediction_tokens(self, *args, **kwargs): "quasc": ["train"], "qed": ["train"], }, - n_parameters=None, + n_parameters=2_850_000_000, memory_usage_mb=10871, max_tokens=None, - embed_dim=None, - license=None, + embed_dim=2048, + license="apache-2.0", public_training_code=None, public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], + is_cross_encoder=True, ) flant5_xxl = ModelMeta( @@ -516,16 +523,17 @@ def get_prediction_tokens(self, *args, **kwargs): "quasc": ["train"], "qed": ["train"], }, - n_parameters=None, + n_parameters=11_300_000_000, memory_usage_mb=42980, max_tokens=None, - embed_dim=None, - license=None, + embed_dim=4096, + license="apache-2.0", public_training_code=None, public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], + is_cross_encoder=True, ) @@ -541,17 +549,18 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="01c7f73d771dfac7d292323805ebc428287df4f9", release_date="2023-07-18", - n_parameters=None, + n_parameters=6_740_000_000, memory_usage_mb=None, max_tokens=None, embed_dim=None, - license=None, + license=None, # llama2 public_training_code=None, public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) llama2_7b_chat = ModelMeta( @@ -577,6 +586,7 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) mistral_7b = ModelMeta( @@ -602,6 +612,7 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, ) followir_7b = ModelMeta( @@ -617,16 +628,17 @@ def get_prediction_tokens(self, *args, **kwargs): revision="4d25d437e38b510c01852070c0731e8f6e1875d1", release_date="2024-04-29", training_datasets={"jhu-clsp/FollowIR-train": ["train"]}, - n_parameters=None, + n_parameters=7_240_000_000, memory_usage_mb=13813, max_tokens=None, embed_dim=None, - license=None, + license="apache-2.0", public_training_code=None, public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], + is_cross_encoder=True, ) @@ -751,12 +763,13 @@ def get_prediction_tokens(self, *args, **kwargs): memory_usage_mb=None, max_tokens=None, embed_dim=None, - license=None, + license="mit", public_training_code=None, public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], + is_cross_encoder=True, ) mt5_13b_mmarco_100k = ModelMeta( @@ -782,4 +795,5 @@ def get_prediction_tokens(self, *args, **kwargs): use_instructions=None, training_datasets=None, framework=["PyTorch"], + is_cross_encoder=True, )