Skip to content
48 changes: 44 additions & 4 deletions mteb/models/nomic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import numpy as np
import torch
import torch.nn.functional as F
import transformers
from packaging.version import Version

import mteb
from mteb.encoder_interface import PromptType
Expand All @@ -16,6 +18,8 @@

logger = logging.getLogger(__name__)

MODERN_BERT_TRANSFORMERS_MIN_VERSION = "4.48.0"


class NomicWrapper(SentenceTransformerWrapper):
"""following the hf model card documentation."""
Expand All @@ -28,6 +32,14 @@ def __init__(
**kwargs: Any,
):
self.model_name = model_name
if model_name == "nomic-ai/modernbert-embed-base" and (
Version(transformers.__version__).release
< Version(MODERN_BERT_TRANSFORMERS_MIN_VERSION).release
):
raise RuntimeError(
f"Current transformers version is {transformers.__version__} is lower than the required version"
f" {MODERN_BERT_TRANSFORMERS_MIN_VERSION}"
)
super().__init__(model_name, revision, model_prompts, **kwargs)

def to(self, device: torch.device) -> None:
Expand Down Expand Up @@ -92,7 +104,7 @@ def encode( # type: ignore
}

nomic_embed_v1_5 = ModelMeta(
loader=partial( # type: ignore
loader=partial(
NomicWrapper,
trust_remote_code=True,
model_name="nomic-ai/nomic-embed-text-v1.5",
Expand All @@ -118,7 +130,7 @@ def encode( # type: ignore
)

nomic_embed_v1 = ModelMeta(
loader=partial( # type: ignore
loader=partial(
NomicWrapper,
trust_remote_code=True,
model_name="nomic-ai/nomic-embed-text-v1",
Expand All @@ -144,7 +156,7 @@ def encode( # type: ignore
)

nomic_embed_v1_ablated = ModelMeta(
loader=partial( # type: ignore
loader=partial(
NomicWrapper,
trust_remote_code=True,
model_name="nomic-ai/nomic-embed-text-v1-ablated",
Expand All @@ -171,7 +183,7 @@ def encode( # type: ignore


nomic_embed_v1_unsupervised = ModelMeta(
loader=partial( # type: ignore
loader=partial(
NomicWrapper,
trust_remote_code=True,
model_name="nomic-ai/nomic-embed-text-v1-unsupervised",
Expand All @@ -195,3 +207,31 @@ def encode( # type: ignore
adapted_from=None,
superseded_by=None,
)

nomic_modern_bert_embed = ModelMeta(
loader=partial(
NomicWrapper,
model_name="nomic-ai/modernbert-embed-base",
revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
model_prompts=model_prompts,
model_kwargs={
"torch_dtype": torch.float16,
},
),
name="nomic-ai/modernbert-embed-base",
languages=["eng-Latn"],
open_weights=True,
revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
release_date="2024-12-29",
n_parameters=149_000_000,
memory_usage=None,
max_tokens=8192,
embed_dim=768,
license="apache-2.0",
reference="https://huggingface.co/nomic-ai/modernbert-embed-base",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
adapted_from=None,
superseded_by=None,
)
Loading