Skip to content
Merged
3 changes: 2 additions & 1 deletion docs/adding_a_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The MTEB Leaderboard is available [here](https://huggingface.co/spaces/mteb/lead
revision="5617a9f61b028005a4858fdac845db406aefb181",
release_date="2024-06-28",
n_parameters=568_000_000,
memory_usage_mb=2167,
embed_dim=4096,
license="mit",
max_tokens=8194,
Expand All @@ -25,7 +26,7 @@ The MTEB Leaderboard is available [here](https://huggingface.co/spaces/mteb/lead
training_datasets={"your_dataset": ["train"]},
)
```
By default, the model will run using the [`sentence_transformers_loader`](../mteb/models/sentence_transformer_wrapper.py) loader function. If you need to use a custom implementation, you can specify the `loader` parameter in the `ModelMeta` class. For example:
To calculate `memory_usage_mb` you can run `model_meta.memory_usage_mb()`. By default, the model will run using the [`sentence_transformers_loader`](../mteb/models/sentence_transformer_wrapper.py) loader function. If you need to use a custom implementation, you can specify the `loader` parameter in the `ModelMeta` class. For example:
```python
from mteb.models.wrapper import Wrapper
from mteb.encoder_interface import PromptType
Expand Down
48 changes: 47 additions & 1 deletion mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Literal

from huggingface_hub import get_safetensors_metadata
from huggingface_hub.errors import (
GatedRepoError,
NotASafetensorsRepoError,
SafetensorsParsingError,
)
from pydantic import BaseModel, ConfigDict

from mteb.abstasks.AbsTask import AbsTask
Expand Down Expand Up @@ -58,8 +64,9 @@ class ModelMeta(BaseModel):
Attributes:
loader: the function that loads the model. If None it will just default to loading the model using the sentence transformer library.
name: The name of the model, ideally the name on huggingface.
n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be None if the the number of parameters is not known (e.g. for proprietary models) or
n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be None if the number of parameters is not known (e.g. for proprietary models) or
if the loader returns a SentenceTransformer model from which it can be derived.
memory_usage_mb: The memory usage of the model in MB. Can be None if the memory usage is not known (e.g. for proprietary models). To calculate it use the `calculate_memory_usage_mb` method.
max_tokens: The maximum number of tokens the model can handle. Can be None if the maximum number of tokens is not known (e.g. for proprietary
models).
embed_dim: The dimension of the embeddings produced by the model. Currently all models are assumed to produce fixed-size embeddings.
Expand Down Expand Up @@ -92,6 +99,7 @@ class ModelMeta(BaseModel):
languages: list[ISO_LANGUAGE_SCRIPT] | None
loader: Callable[..., Encoder] | None = None
n_parameters: int | None
memory_usage_mb: float | None
max_tokens: float | None
embed_dim: int | None
license: str | None
Expand Down Expand Up @@ -149,3 +157,41 @@ def is_zero_shot_on(self, tasks: list[AbsTask]) -> bool | None:
model_datasets = {ds_name for ds_name, splits in self.training_datasets.items()}
intersection = model_datasets & benchmark_datasets
return len(intersection) == 0

def calculate_memory_usage_mb(self) -> int | None:
"""Calculates the memory usage (in FP32) of the model in MB."""
if "API" in self.framework:
return None

MB = 1024**2
try:
safetensors_metadata = get_safetensors_metadata(self.name)
if len(safetensors_metadata.parameter_count) >= 0:
dtype_size_map = {
"F64": 8, # 64-bit float
"F32": 4, # 32-bit float (FP32)
"F16": 2, # 16-bit float (FP16)
"BF16": 2, # BFloat16
"I64": 8, # 64-bit integer
"I32": 4, # 32-bit integer
"I16": 2, # 16-bit integer
"I8": 1, # 8-bit integer
"U8": 1, # Unsigned 8-bit integer
"BOOL": 1, # Boolean (assuming 1 byte per value)
}
total_memory_bytes = sum(
parameters * dtype_size_map.get(dtype, 4)
for dtype, parameters in safetensors_metadata.parameter_count.items()
)
return round(total_memory_bytes / MB) # Convert to MB

except (NotASafetensorsRepoError, SafetensorsParsingError, GatedRepoError):
pass
if self.n_parameters is None:
return None
# Model memory in bytes. For FP32 each parameter is 4 bytes.
model_memory_bytes = self.n_parameters * 4

# Convert to MB
model_memory_mb = model_memory_bytes / MB
return round(model_memory_mb)
1 change: 1 addition & 0 deletions mteb/models/align_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def get_fused_embeddings(
release_date="2023-02-24",
modalities=["image", "text"],
n_parameters=176_000_000,
memory_usage_mb=671,
max_tokens=64,
embed_dim=768,
license=None,
Expand Down
8 changes: 8 additions & 0 deletions mteb/models/arctic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=22_600_000,
memory_usage_mb=86,
max_tokens=512,
embed_dim=384,
license="apache-2.0",
Expand Down Expand Up @@ -148,6 +149,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=32_200_000,
memory_usage_mb=127,
max_tokens=512,
embed_dim=384,
license="apache-2.0",
Expand Down Expand Up @@ -201,6 +203,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=109_000_000,
memory_usage_mb=415,
max_tokens=512,
embed_dim=768,
license="apache-2.0",
Expand Down Expand Up @@ -254,6 +257,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=137_000_000,
memory_usage_mb=522,
max_tokens=2048,
embed_dim=768,
license="apache-2.0",
Expand Down Expand Up @@ -307,6 +311,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=335_000_000,
memory_usage_mb=1274,
max_tokens=512,
embed_dim=1024,
license="apache-2.0",
Expand Down Expand Up @@ -362,6 +367,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=109_000_000,
memory_usage_mb=415,
max_tokens=512,
embed_dim=768,
license="apache-2.0",
Expand Down Expand Up @@ -389,6 +395,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=305_000_000,
memory_usage_mb=1165,
max_tokens=8192,
embed_dim=768,
license="apache-2.0",
Expand Down Expand Up @@ -438,6 +445,7 @@
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=568_000_000,
memory_usage_mb=2166,
max_tokens=8192,
embed_dim=1024,
license="apache-2.0",
Expand Down
7 changes: 5 additions & 2 deletions mteb/models/bedrock_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
from mteb.model_meta import ModelMeta
from mteb.models.cohere_models import model_prompts as cohere_model_prompts
from mteb.models.cohere_models import supported_languages as cohere_supported_languages
from mteb.models.wrapper import Wrapper
from mteb.requires_package import requires_package

from .wrapper import Wrapper

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -174,6 +173,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray:
embed_dim=1536,
open_weights=False,
n_parameters=None,
memory_usage_mb=None,
public_training_code=None,
public_training_data=None, # assumed
training_datasets=None,
Expand All @@ -199,6 +199,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray:
embed_dim=1024,
open_weights=False,
n_parameters=None,
memory_usage_mb=None,
public_training_code=None,
public_training_data=None, # assumed
training_datasets=None,
Expand Down Expand Up @@ -226,6 +227,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray:
revision="1",
release_date="2023-11-02",
n_parameters=None,
memory_usage_mb=None,
public_training_code=None,
public_training_data=None, # assumed
training_datasets=None,
Expand All @@ -252,6 +254,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray:
revision="1",
release_date="2023-11-02",
n_parameters=None,
memory_usage_mb=None,
public_training_code=None,
public_training_data=None, # assumed
training_datasets=None,
Expand Down
18 changes: 16 additions & 2 deletions mteb/models/bge_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from functools import partial

from mteb.model_meta import ModelMeta, sentence_transformers_loader

from .e5_instruct import E5_MISTRAL_TRAINING_DATA
from mteb.models.e5_instruct import E5_MISTRAL_TRAINING_DATA

model_prompts = {"query": "Represent this sentence for searching relevant passages: "}
model_prompts_zh = {"query": "为这个句子生成表示以用于检索相关文章:"}
Expand Down Expand Up @@ -326,6 +325,7 @@
revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
release_date="2023-09-12", # initial commit of hf model.
n_parameters=33_400_000,
memory_usage_mb=127,
embed_dim=512,
license="mit",
max_tokens=512,
Expand All @@ -351,6 +351,7 @@
revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
release_date="2023-09-11", # initial commit of hf model.
n_parameters=109_000_000,
memory_usage_mb=390,
embed_dim=768,
license="mit",
max_tokens=512,
Expand All @@ -376,6 +377,7 @@
revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
release_date="2023-09-12", # initial commit of hf model.
n_parameters=335_000_000,
memory_usage_mb=1242,
embed_dim=1024,
license="mit",
max_tokens=512,
Expand All @@ -401,6 +403,7 @@
revision="1d2363c5de6ce9ba9c890c8e23a4c72dce540ca8",
release_date="2023-08-05", # initial commit of hf model.
n_parameters=33_400_000,
memory_usage_mb=127,
embed_dim=512,
license="mit",
max_tokens=512,
Expand All @@ -427,6 +430,7 @@
revision="0e5f83d4895db7955e4cb9ed37ab73f7ded339b6",
release_date="2023-08-05", # initial commit of hf model.
n_parameters=109_000_000,
memory_usage_mb=390,
embed_dim=768,
license="mit",
max_tokens=512,
Expand All @@ -453,6 +457,7 @@
revision="b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8",
release_date="2023-08-02", # initial commit of hf model.
n_parameters=335_000_000,
memory_usage_mb=1242,
embed_dim=1024,
license="mit",
max_tokens=512,
Expand All @@ -479,6 +484,7 @@
revision="4778d71a06863076696b03fd2777eb118712cad8",
release_date="2023-08-05", # initial commit of hf model.
n_parameters=33_400_000,
memory_usage_mb=127,
embed_dim=512,
license="mit",
max_tokens=512,
Expand All @@ -505,6 +511,7 @@
revision="b737bf5dcc6ee8bdc530531266b4804a5d77b5d8",
release_date="2023-08-05", # initial commit of hf model.
n_parameters=109_000_000,
memory_usage_mb=390,
embed_dim=768,
license="mit",
max_tokens=512,
Expand All @@ -531,6 +538,7 @@
revision="abe7d9d814b775ca171121fb03f394dc42974275",
release_date="2023-08-05", # initial commit of hf model.
n_parameters=335_000_000,
memory_usage_mb=1242,
embed_dim=1024,
license="mit",
max_tokens=512,
Expand Down Expand Up @@ -558,6 +566,7 @@
revision="7999e1d3359715c523056ef9478215996d62a620",
release_date="2023-09-12", # initial commit of hf model.
n_parameters=33_400_000,
memory_usage_mb=91,
embed_dim=512,
license="mit",
max_tokens=512,
Expand All @@ -583,6 +592,7 @@
revision="f03589ceff5aac7111bd60cfc7d497ca17ecac65",
release_date="2023-09-11", # initial commit of hf model.
n_parameters=109_000_000,
memory_usage_mb=416,
embed_dim=768,
license="mit",
max_tokens=512,
Expand All @@ -608,6 +618,7 @@
revision="79e7739b6ab944e86d6171e44d24c997fc1e0116",
release_date="2023-09-12", # initial commit of hf model.
n_parameters=335_000_000,
memory_usage_mb=1278,
embed_dim=1024,
license="mit",
max_tokens=512,
Expand All @@ -632,6 +643,7 @@
revision="5617a9f61b028005a4858fdac845db406aefb181",
release_date="2024-06-28",
n_parameters=568_000_000,
memory_usage_mb=2167,
embed_dim=4096,
license="mit",
max_tokens=8194,
Expand Down Expand Up @@ -665,6 +677,7 @@
revision="992e13d8984fde2c31ef8a3cb2c038aeec513b8a",
release_date="2024-07-25", # initial commit of hf model.
n_parameters=9.24 * 1e9,
memory_usage_mb=35254,
embed_dim=3584, # from old C-MTEB leaderboard
license="gemma",
max_tokens=8192, # from old C-MTEB leaderboard
Expand Down Expand Up @@ -747,6 +760,7 @@
revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5",
release_date="2024-07-25", # initial commit of hf model.
n_parameters=7.11 * 1e9,
memory_usage_mb=27125,
embed_dim=4096,
license="apache-2",
max_tokens=32768,
Expand Down
2 changes: 2 additions & 0 deletions mteb/models/blip2_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def get_fused_embeddings(
release_date="2024-03-22",
modalities=["image", "text"],
n_parameters=3_740_000_000,
memory_usage_mb=14285,
max_tokens=None,
embed_dim=768,
license="mit",
Expand All @@ -256,6 +257,7 @@ def get_fused_embeddings(
release_date="2024-03-31",
modalities=["image", "text"],
n_parameters=7_750_000_000,
memory_usage_mb=29577,
max_tokens=None,
embed_dim=768,
license="mit",
Expand Down
Loading