diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 248449e5e4..aa6989941f 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -7,6 +7,8 @@ import numpy as np import torch import torch.nn.functional as F +import transformers +from packaging.version import Version import mteb from mteb.encoder_interface import PromptType @@ -16,6 +18,8 @@ logger = logging.getLogger(__name__) +MODERN_BERT_TRANSFORMERS_MIN_VERSION = "4.48.0" + class NomicWrapper(SentenceTransformerWrapper): """following the hf model card documentation.""" @@ -28,6 +32,14 @@ def __init__( **kwargs: Any, ): self.model_name = model_name + if model_name == "nomic-ai/modernbert-embed-base" and ( + Version(transformers.__version__).release + < Version(MODERN_BERT_TRANSFORMERS_MIN_VERSION).release + ): + raise RuntimeError( + f"Current transformers version is {transformers.__version__} is lower than the required version" + f" {MODERN_BERT_TRANSFORMERS_MIN_VERSION}" + ) super().__init__(model_name, revision, model_prompts, **kwargs) def to(self, device: torch.device) -> None: @@ -92,7 +104,7 @@ def encode( # type: ignore } nomic_embed_v1_5 = ModelMeta( - loader=partial( # type: ignore + loader=partial( NomicWrapper, trust_remote_code=True, model_name="nomic-ai/nomic-embed-text-v1.5", @@ -118,7 +130,7 @@ def encode( # type: ignore ) nomic_embed_v1 = ModelMeta( - loader=partial( # type: ignore + loader=partial( NomicWrapper, trust_remote_code=True, model_name="nomic-ai/nomic-embed-text-v1", @@ -144,7 +156,7 @@ def encode( # type: ignore ) nomic_embed_v1_ablated = ModelMeta( - loader=partial( # type: ignore + loader=partial( NomicWrapper, trust_remote_code=True, model_name="nomic-ai/nomic-embed-text-v1-ablated", @@ -171,7 +183,7 @@ def encode( # type: ignore nomic_embed_v1_unsupervised = ModelMeta( - loader=partial( # type: ignore + loader=partial( NomicWrapper, trust_remote_code=True, model_name="nomic-ai/nomic-embed-text-v1-unsupervised", @@ -195,3 +207,31 @@ def encode( # type: ignore adapted_from=None, superseded_by=None, ) + +nomic_modern_bert_embed = ModelMeta( + loader=partial( + NomicWrapper, + model_name="nomic-ai/modernbert-embed-base", + revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12", + model_prompts=model_prompts, + model_kwargs={ + "torch_dtype": torch.float16, + }, + ), + name="nomic-ai/modernbert-embed-base", + languages=["eng-Latn"], + open_weights=True, + revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12", + release_date="2024-12-29", + n_parameters=149_000_000, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/modernbert-embed-base", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + superseded_by=None, +)