diff --git a/docs/adding_a_model.md b/docs/adding_a_model.md index 088199e264..2ebf965761 100644 --- a/docs/adding_a_model.md +++ b/docs/adding_a_model.md @@ -13,6 +13,7 @@ The MTEB Leaderboard is available [here](https://huggingface.co/spaces/mteb/lead revision="5617a9f61b028005a4858fdac845db406aefb181", release_date="2024-06-28", n_parameters=568_000_000, + memory_usage_mb=2167, embed_dim=4096, license="mit", max_tokens=8194, @@ -25,7 +26,7 @@ The MTEB Leaderboard is available [here](https://huggingface.co/spaces/mteb/lead training_datasets={"your_dataset": ["train"]}, ) ``` - By default, the model will run using the [`sentence_transformers_loader`](../mteb/models/sentence_transformer_wrapper.py) loader function. If you need to use a custom implementation, you can specify the `loader` parameter in the `ModelMeta` class. For example: + To calculate `memory_usage_mb` you can run `model_meta.memory_usage_mb()`. By default, the model will run using the [`sentence_transformers_loader`](../mteb/models/sentence_transformer_wrapper.py) loader function. If you need to use a custom implementation, you can specify the `loader` parameter in the `ModelMeta` class. For example: ```python from mteb.models.wrapper import Wrapper from mteb.encoder_interface import PromptType diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 6486c849ea..c493eb6544 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -4,6 +4,12 @@ from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal +from huggingface_hub import get_safetensors_metadata +from huggingface_hub.errors import ( + GatedRepoError, + NotASafetensorsRepoError, + SafetensorsParsingError, +) from pydantic import BaseModel, ConfigDict from mteb.abstasks.AbsTask import AbsTask @@ -58,8 +64,9 @@ class ModelMeta(BaseModel): Attributes: loader: the function that loads the model. If None it will just default to loading the model using the sentence transformer library. name: The name of the model, ideally the name on huggingface. - n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be None if the the number of parameters is not known (e.g. for proprietary models) or + n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be None if the number of parameters is not known (e.g. for proprietary models) or if the loader returns a SentenceTransformer model from which it can be derived. + memory_usage_mb: The memory usage of the model in MB. Can be None if the memory usage is not known (e.g. for proprietary models). To calculate it use the `calculate_memory_usage_mb` method. max_tokens: The maximum number of tokens the model can handle. Can be None if the maximum number of tokens is not known (e.g. for proprietary models). embed_dim: The dimension of the embeddings produced by the model. Currently all models are assumed to produce fixed-size embeddings. @@ -92,6 +99,7 @@ class ModelMeta(BaseModel): languages: list[ISO_LANGUAGE_SCRIPT] | None loader: Callable[..., Encoder] | None = None n_parameters: int | None + memory_usage_mb: float | None max_tokens: float | None embed_dim: int | None license: str | None @@ -149,3 +157,41 @@ def is_zero_shot_on(self, tasks: list[AbsTask]) -> bool | None: model_datasets = {ds_name for ds_name, splits in self.training_datasets.items()} intersection = model_datasets & benchmark_datasets return len(intersection) == 0 + + def calculate_memory_usage_mb(self) -> int | None: + """Calculates the memory usage (in FP32) of the model in MB.""" + if "API" in self.framework: + return None + + MB = 1024**2 + try: + safetensors_metadata = get_safetensors_metadata(self.name) + if len(safetensors_metadata.parameter_count) >= 0: + dtype_size_map = { + "F64": 8, # 64-bit float + "F32": 4, # 32-bit float (FP32) + "F16": 2, # 16-bit float (FP16) + "BF16": 2, # BFloat16 + "I64": 8, # 64-bit integer + "I32": 4, # 32-bit integer + "I16": 2, # 16-bit integer + "I8": 1, # 8-bit integer + "U8": 1, # Unsigned 8-bit integer + "BOOL": 1, # Boolean (assuming 1 byte per value) + } + total_memory_bytes = sum( + parameters * dtype_size_map.get(dtype, 4) + for dtype, parameters in safetensors_metadata.parameter_count.items() + ) + return round(total_memory_bytes / MB) # Convert to MB + + except (NotASafetensorsRepoError, SafetensorsParsingError, GatedRepoError): + pass + if self.n_parameters is None: + return None + # Model memory in bytes. For FP32 each parameter is 4 bytes. + model_memory_bytes = self.n_parameters * 4 + + # Convert to MB + model_memory_mb = model_memory_bytes / MB + return round(model_memory_mb) diff --git a/mteb/models/align_models.py b/mteb/models/align_models.py index 95fb6fda25..be8ff8e56d 100644 --- a/mteb/models/align_models.py +++ b/mteb/models/align_models.py @@ -144,6 +144,7 @@ def get_fused_embeddings( release_date="2023-02-24", modalities=["image", "text"], n_parameters=176_000_000, + memory_usage_mb=671, max_tokens=64, embed_dim=768, license=None, diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index 22051536ae..9009656dac 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -94,6 +94,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=22_600_000, + memory_usage_mb=86, max_tokens=512, embed_dim=384, license="apache-2.0", @@ -148,6 +149,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=32_200_000, + memory_usage_mb=127, max_tokens=512, embed_dim=384, license="apache-2.0", @@ -201,6 +203,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=109_000_000, + memory_usage_mb=415, max_tokens=512, embed_dim=768, license="apache-2.0", @@ -254,6 +257,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=137_000_000, + memory_usage_mb=522, max_tokens=2048, embed_dim=768, license="apache-2.0", @@ -307,6 +311,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=335_000_000, + memory_usage_mb=1274, max_tokens=512, embed_dim=1024, license="apache-2.0", @@ -362,6 +367,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=109_000_000, + memory_usage_mb=415, max_tokens=512, embed_dim=768, license="apache-2.0", @@ -389,6 +395,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=305_000_000, + memory_usage_mb=1165, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -438,6 +445,7 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=568_000_000, + memory_usage_mb=2166, max_tokens=8192, embed_dim=1024, license="apache-2.0", diff --git a/mteb/models/bedrock_models.py b/mteb/models/bedrock_models.py index 4616209df1..a97535960c 100644 --- a/mteb/models/bedrock_models.py +++ b/mteb/models/bedrock_models.py @@ -13,10 +13,9 @@ from mteb.model_meta import ModelMeta from mteb.models.cohere_models import model_prompts as cohere_model_prompts from mteb.models.cohere_models import supported_languages as cohere_supported_languages +from mteb.models.wrapper import Wrapper from mteb.requires_package import requires_package -from .wrapper import Wrapper - logger = logging.getLogger(__name__) @@ -174,6 +173,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: embed_dim=1536, open_weights=False, n_parameters=None, + memory_usage_mb=None, public_training_code=None, public_training_data=None, # assumed training_datasets=None, @@ -199,6 +199,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, public_training_code=None, public_training_data=None, # assumed training_datasets=None, @@ -226,6 +227,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: revision="1", release_date="2023-11-02", n_parameters=None, + memory_usage_mb=None, public_training_code=None, public_training_data=None, # assumed training_datasets=None, @@ -252,6 +254,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: revision="1", release_date="2023-11-02", n_parameters=None, + memory_usage_mb=None, public_training_code=None, public_training_data=None, # assumed training_datasets=None, diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index 9f350696da..94eef2c2d9 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -3,8 +3,7 @@ from functools import partial from mteb.model_meta import ModelMeta, sentence_transformers_loader - -from .e5_instruct import E5_MISTRAL_TRAINING_DATA +from mteb.models.e5_instruct import E5_MISTRAL_TRAINING_DATA model_prompts = {"query": "Represent this sentence for searching relevant passages: "} model_prompts_zh = {"query": "为这个句子生成表示以用于检索相关文章:"} @@ -326,6 +325,7 @@ revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a", release_date="2023-09-12", # initial commit of hf model. n_parameters=33_400_000, + memory_usage_mb=127, embed_dim=512, license="mit", max_tokens=512, @@ -351,6 +351,7 @@ revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a", release_date="2023-09-11", # initial commit of hf model. n_parameters=109_000_000, + memory_usage_mb=390, embed_dim=768, license="mit", max_tokens=512, @@ -376,6 +377,7 @@ revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09", release_date="2023-09-12", # initial commit of hf model. n_parameters=335_000_000, + memory_usage_mb=1242, embed_dim=1024, license="mit", max_tokens=512, @@ -401,6 +403,7 @@ revision="1d2363c5de6ce9ba9c890c8e23a4c72dce540ca8", release_date="2023-08-05", # initial commit of hf model. n_parameters=33_400_000, + memory_usage_mb=127, embed_dim=512, license="mit", max_tokens=512, @@ -427,6 +430,7 @@ revision="0e5f83d4895db7955e4cb9ed37ab73f7ded339b6", release_date="2023-08-05", # initial commit of hf model. n_parameters=109_000_000, + memory_usage_mb=390, embed_dim=768, license="mit", max_tokens=512, @@ -453,6 +457,7 @@ revision="b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8", release_date="2023-08-02", # initial commit of hf model. n_parameters=335_000_000, + memory_usage_mb=1242, embed_dim=1024, license="mit", max_tokens=512, @@ -479,6 +484,7 @@ revision="4778d71a06863076696b03fd2777eb118712cad8", release_date="2023-08-05", # initial commit of hf model. n_parameters=33_400_000, + memory_usage_mb=127, embed_dim=512, license="mit", max_tokens=512, @@ -505,6 +511,7 @@ revision="b737bf5dcc6ee8bdc530531266b4804a5d77b5d8", release_date="2023-08-05", # initial commit of hf model. n_parameters=109_000_000, + memory_usage_mb=390, embed_dim=768, license="mit", max_tokens=512, @@ -531,6 +538,7 @@ revision="abe7d9d814b775ca171121fb03f394dc42974275", release_date="2023-08-05", # initial commit of hf model. n_parameters=335_000_000, + memory_usage_mb=1242, embed_dim=1024, license="mit", max_tokens=512, @@ -558,6 +566,7 @@ revision="7999e1d3359715c523056ef9478215996d62a620", release_date="2023-09-12", # initial commit of hf model. n_parameters=33_400_000, + memory_usage_mb=91, embed_dim=512, license="mit", max_tokens=512, @@ -583,6 +592,7 @@ revision="f03589ceff5aac7111bd60cfc7d497ca17ecac65", release_date="2023-09-11", # initial commit of hf model. n_parameters=109_000_000, + memory_usage_mb=416, embed_dim=768, license="mit", max_tokens=512, @@ -608,6 +618,7 @@ revision="79e7739b6ab944e86d6171e44d24c997fc1e0116", release_date="2023-09-12", # initial commit of hf model. n_parameters=335_000_000, + memory_usage_mb=1278, embed_dim=1024, license="mit", max_tokens=512, @@ -632,6 +643,7 @@ revision="5617a9f61b028005a4858fdac845db406aefb181", release_date="2024-06-28", n_parameters=568_000_000, + memory_usage_mb=2167, embed_dim=4096, license="mit", max_tokens=8194, @@ -665,6 +677,7 @@ revision="992e13d8984fde2c31ef8a3cb2c038aeec513b8a", release_date="2024-07-25", # initial commit of hf model. n_parameters=9.24 * 1e9, + memory_usage_mb=35254, embed_dim=3584, # from old C-MTEB leaderboard license="gemma", max_tokens=8192, # from old C-MTEB leaderboard @@ -747,6 +760,7 @@ revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5", release_date="2024-07-25", # initial commit of hf model. n_parameters=7.11 * 1e9, + memory_usage_mb=27125, embed_dim=4096, license="apache-2", max_tokens=32768, diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index e396cf39f0..1c6e75b0c2 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -232,6 +232,7 @@ def get_fused_embeddings( release_date="2024-03-22", modalities=["image", "text"], n_parameters=3_740_000_000, + memory_usage_mb=14285, max_tokens=None, embed_dim=768, license="mit", @@ -256,6 +257,7 @@ def get_fused_embeddings( release_date="2024-03-31", modalities=["image", "text"], n_parameters=7_750_000_000, + memory_usage_mb=29577, max_tokens=None, embed_dim=768, license="mit", diff --git a/mteb/models/blip_models.py b/mteb/models/blip_models.py index 43822465dc..9fadf15e1a 100644 --- a/mteb/models/blip_models.py +++ b/mteb/models/blip_models.py @@ -165,6 +165,7 @@ def get_fused_embeddings( release_date="2023-12-07", modalities=["image", "text"], n_parameters=470_000_000, + memory_usage_mb=1792, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -193,6 +194,7 @@ def get_fused_embeddings( release_date="2023-08-01", modalities=["image", "text"], n_parameters=247_000_000, + memory_usage_mb=942, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -222,6 +224,7 @@ def get_fused_embeddings( release_date="2023-12-07", modalities=["image", "text"], n_parameters=247_000_000, + memory_usage_mb=1467, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -249,6 +252,7 @@ def get_fused_embeddings( release_date="2023-01-22", modalities=["image", "text"], n_parameters=247_000_000, + memory_usage_mb=942, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -276,6 +280,7 @@ def get_fused_embeddings( release_date="2023-08-01", modalities=["image", "text"], n_parameters=247_000_000, + memory_usage_mb=942, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -303,6 +308,7 @@ def get_fused_embeddings( release_date="2023-08-01", modalities=["image", "text"], n_parameters=470_000_000, + memory_usage_mb=1793, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -331,6 +337,7 @@ def get_fused_embeddings( release_date="2023-08-01", modalities=["image", "text"], n_parameters=247_000_000, + memory_usage_mb=942, max_tokens=512, embed_dim=768, license="bsd-3-clause", @@ -359,6 +366,7 @@ def get_fused_embeddings( release_date="2023-08-01", modalities=["image", "text"], n_parameters=470_000_000, + memory_usage_mb=1793, max_tokens=512, embed_dim=768, license="bsd-3-clause", diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index 6e3d3747d9..15e90e0f43 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -5,8 +5,7 @@ from mteb.evaluation.evaluators.RetrievalEvaluator import DRESModel from mteb.model_meta import ModelMeta - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -131,6 +130,7 @@ def encode(self, texts: list[str], **kwargs): revision="0_1_10", release_date="2024-07-10", ## release of version 0.1.10 n_parameters=None, + memory_usage_mb=None, embed_dim=None, license=None, max_tokens=None, diff --git a/mteb/models/cde_models.py b/mteb/models/cde_models.py index 78870ef129..33e852d884 100644 --- a/mteb/models/cde_models.py +++ b/mteb/models/cde_models.py @@ -3,8 +3,7 @@ import logging from mteb.model_meta import ModelMeta - -from .bge_models import bge_full_data +from mteb.models.bge_models import bge_full_data logger = logging.getLogger(__name__) @@ -16,7 +15,8 @@ open_weights=True, revision="8d5736163718a8b65cd787b75ed61020d18bad3c", release_date="2024-09-24", - n_parameters=int(281 * 1e6), # Though the second-stage model is only 140M + n_parameters=int(281 * 1e6), + memory_usage_mb=1072, # Though the second-stage model is only 140M max_tokens=512, embed_dim=768, license="mit", @@ -38,7 +38,8 @@ open_weights=True, revision="a7e5882ad52c27ea2831fc8258f24379c25cb459", release_date="2025-01-13", - n_parameters=int(306 * 1e6), # Though the second-stage model is only 140M + n_parameters=int(306 * 1e6), + memory_usage_mb=1166, # Though the second-stage model is only 140M max_tokens=512, embed_dim=768, license="mit", diff --git a/mteb/models/clip_models.py b/mteb/models/clip_models.py index faee0e7c9d..a8c3da96c8 100644 --- a/mteb/models/clip_models.py +++ b/mteb/models/clip_models.py @@ -148,6 +148,7 @@ def get_fused_embeddings( release_date="2021-02-26", modalities=["image", "text"], n_parameters=428_000_000, + memory_usage_mb=1631, max_tokens=77, embed_dim=768, license=None, @@ -172,6 +173,7 @@ def get_fused_embeddings( release_date="2021-02-26", modalities=["image", "text"], n_parameters=151_000_000, + memory_usage_mb=576, max_tokens=77, embed_dim=512, license=None, @@ -196,6 +198,7 @@ def get_fused_embeddings( release_date="2021-02-26", modalities=["image", "text"], n_parameters=151_000_000, + memory_usage_mb=576, max_tokens=77, embed_dim=512, license=None, diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 60ff63ee81..2a40f47218 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -9,8 +9,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper supported_languages = [ "afr-Latn", @@ -227,6 +226,7 @@ def encode( revision="1", release_date="2023-11-02", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=512, reference="https://cohere.com/blog/introducing-embed-v3", @@ -252,6 +252,7 @@ def encode( revision="1", release_date="2023-11-02", n_parameters=None, + memory_usage_mb=None, max_tokens=512, embed_dim=1024, license=None, @@ -276,6 +277,7 @@ def encode( reference="https://cohere.com/blog/introducing-embed-v3", release_date="2023-11-02", n_parameters=None, + memory_usage_mb=None, max_tokens=512, embed_dim=384, license=None, @@ -300,6 +302,7 @@ def encode( revision="1", release_date="2023-11-02", n_parameters=None, + memory_usage_mb=None, max_tokens=512, embed_dim=384, license=None, diff --git a/mteb/models/cohere_v.py b/mteb/models/cohere_v.py index c84d5ff640..578e435955 100644 --- a/mteb/models/cohere_v.py +++ b/mteb/models/cohere_v.py @@ -187,6 +187,7 @@ def get_fused_embeddings( revision="1", release_date="2024-10-24", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=1024, license=None, @@ -208,6 +209,7 @@ def get_fused_embeddings( revision="1", release_date="2024-10-24", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=1024, license=None, diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py index 04d053a6c9..628272a045 100644 --- a/mteb/models/colbert_models.py +++ b/mteb/models/colbert_models.py @@ -10,8 +10,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -156,6 +155,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: public_training_data=None, release_date="2024-09-21", n_parameters=110 * 1e6, + memory_usage_mb=418, max_tokens=180, # Reduced for Benchmarking - see ColBERT paper embed_dim=None, # Bag of Embeddings (128) for each token license="mit", @@ -211,6 +211,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: public_training_data=None, release_date="2024-08-16", n_parameters=559 * 1e6, + memory_usage_mb=1067, max_tokens=8192, embed_dim=None, # Bag of Embeddings (128) for each token license="cc-by-nc-4.0", diff --git a/mteb/models/dino_models.py b/mteb/models/dino_models.py index c2cd4db5fe..31cd442f25 100644 --- a/mteb/models/dino_models.py +++ b/mteb/models/dino_models.py @@ -137,6 +137,7 @@ def get_fused_embeddings( release_date="2023-07-18", modalities=["image"], n_parameters=22_100_000, + memory_usage_mb=84, max_tokens=None, embed_dim=384, license="apache-2.0", @@ -161,6 +162,7 @@ def get_fused_embeddings( release_date="2023-07-18", modalities=["image"], n_parameters=86_600_000, + memory_usage_mb=330, max_tokens=None, embed_dim=768, license="apache-2.0", @@ -185,6 +187,7 @@ def get_fused_embeddings( release_date="2023-07-18", modalities=["image"], n_parameters=304_000_000, + memory_usage_mb=1161, max_tokens=None, embed_dim=1024, license="apache-2.0", @@ -209,6 +212,7 @@ def get_fused_embeddings( release_date="2023-07-18", modalities=["image"], n_parameters=1_140_000_000, + memory_usage_mb=4335, max_tokens=None, embed_dim=1536, license="apache-2.0", diff --git a/mteb/models/e5_instruct.py b/mteb/models/e5_instruct.py index 0266f3d5e9..87f75fdd16 100644 --- a/mteb/models/e5_instruct.py +++ b/mteb/models/e5_instruct.py @@ -5,9 +5,12 @@ import torch from mteb.model_meta import ModelMeta - -from .e5_models import E5_PAPER_RELEASE_DATE, E5_TRAINING_DATA, XLMR_LANGUAGES -from .instruct_wrapper import instruct_wrapper +from mteb.models.e5_models import ( + E5_PAPER_RELEASE_DATE, + E5_TRAINING_DATA, + XLMR_LANGUAGES, +) +from mteb.models.instruct_wrapper import instruct_wrapper MISTRAL_LANGUAGES = ["eng_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "spa_Latn"] @@ -45,6 +48,7 @@ use_instructions=True, reference="https://huggingface.co/intfloat/multilingual-e5-large-instruct", n_parameters=560_000_000, + memory_usage_mb=1068, embed_dim=1024, license="mit", max_tokens=514, @@ -76,6 +80,7 @@ use_instructions=True, reference="https://huggingface.co/intfloat/e5-mistral-7b-instruct", n_parameters=7_111_000_000, + memory_usage_mb=13563, embed_dim=4096, license="mit", max_tokens=32768, @@ -102,6 +107,7 @@ release_date="2024-08-30", languages=["eng_Latn"], n_parameters=7110660096, + memory_usage_mb=13563, max_tokens=32768.0, embed_dim=4096, license="mit", diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index ee54355956..1814eacc89 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -152,6 +152,7 @@ revision="fd1525a9fd15316a2d503bf26ab031a61d056e98", release_date=E5_PAPER_RELEASE_DATE, n_parameters=118_000_000, + memory_usage_mb=449, embed_dim=384, license="mit", max_tokens=512, @@ -176,6 +177,7 @@ revision="d13f1b27baf31030b7fd040960d60d909913633f", release_date=E5_PAPER_RELEASE_DATE, n_parameters=278_000_000, + memory_usage_mb=1061, embed_dim=768, license="mit", max_tokens=514, @@ -201,6 +203,7 @@ revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb", release_date=E5_PAPER_RELEASE_DATE, n_parameters=560_000_000, + memory_usage_mb=2136, embed_dim=1024, license="mit", max_tokens=514, @@ -225,6 +228,7 @@ revision="dca8b1a9dae0d4575df2bf423a5edb485a431236", release_date=E5_PAPER_RELEASE_DATE, n_parameters=33_000_000, + memory_usage_mb=127, embed_dim=384, license="mit", max_tokens=512, @@ -250,6 +254,7 @@ revision="e272f3049e853b47cb5ca3952268c6662abda68f", release_date=E5_PAPER_RELEASE_DATE, n_parameters=33_000_000, + memory_usage_mb=127, embed_dim=384, license="mit", max_tokens=512, @@ -275,6 +280,7 @@ revision="1c644c92ad3ba1efdad3f1451a637716616a20e8", release_date=E5_PAPER_RELEASE_DATE, n_parameters=109_000_000, + memory_usage_mb=418, embed_dim=768, license="mit", max_tokens=512, @@ -302,6 +308,7 @@ revision="b322e09026e4ea05f42beadf4d661fb4e101d311", release_date=E5_PAPER_RELEASE_DATE, n_parameters=335_000_000, + memory_usage_mb=1278, embed_dim=1024, license="mit", max_tokens=514, @@ -329,6 +336,7 @@ revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81", release_date="2022-12-26", n_parameters=335_000_000, + memory_usage_mb=1278, embed_dim=1024, license="apache-2.0", max_tokens=512, @@ -356,6 +364,7 @@ revision="b533fe4636f4a2507c08ddab40644d20b0006d6a", release_date="2022-12-26", n_parameters=109_000_000, + memory_usage_mb=418, embed_dim=768, license="apache-2.0", max_tokens=512, diff --git a/mteb/models/e5_v.py b/mteb/models/e5_v.py index 909cfcbab7..14383b4413 100644 --- a/mteb/models/e5_v.py +++ b/mteb/models/e5_v.py @@ -199,6 +199,7 @@ def get_fused_embeddings( release_date="2024-07-17", modalities=["image", "text"], n_parameters=8_360_000_000, + memory_usage_mb=15936, max_tokens=8192, embed_dim=4096, license=None, diff --git a/mteb/models/evaclip_models.py b/mteb/models/evaclip_models.py index fdd25771d4..545a06027f 100644 --- a/mteb/models/evaclip_models.py +++ b/mteb/models/evaclip_models.py @@ -185,6 +185,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=149_000_000, + memory_usage_mb=568, max_tokens=77, embed_dim=512, license="mit", @@ -209,6 +210,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=428_000_000, + memory_usage_mb=1633, max_tokens=77, embed_dim=768, license="mit", @@ -233,6 +235,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=4_700_000_000, + memory_usage_mb=17929, max_tokens=77, embed_dim=1024, license="mit", @@ -258,6 +261,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=5_000_000_000, + memory_usage_mb=19073, max_tokens=77, embed_dim=1024, license="mit", diff --git a/mteb/models/gme_models.py b/mteb/models/gme_models.py index 84b1bbc0dd..42c0e48e14 100644 --- a/mteb/models/gme_models.py +++ b/mteb/models/gme_models.py @@ -14,6 +14,7 @@ revision="cfeb66885b598de483cc04eb08c7d9da534d7afe", release_date="2024-12-21", n_parameters=int(2.21 * 1e9), + memory_usage_mb=8427, max_tokens=32768, embed_dim=1536, license="mit", @@ -42,6 +43,7 @@ revision="d42eca5a540526cfa982a349724b24b25c12a95e", release_date="2024-12-21", n_parameters=int(8.29 * 1e9), + memory_usage_mb=8427, max_tokens=32768, embed_dim=3584, license="mit", diff --git a/mteb/models/gme_v_models.py b/mteb/models/gme_v_models.py index f7184725c8..b12bd75eb3 100644 --- a/mteb/models/gme_v_models.py +++ b/mteb/models/gme_v_models.py @@ -440,6 +440,7 @@ def fetch_image( release_date="2024-12-24", modalities=["image", "text"], n_parameters=2_210_000_000, + memory_usage_mb=8427, embed_dim=1536, license="apache-2.0", max_tokens=32768, @@ -464,6 +465,7 @@ def fetch_image( release_date="2024-12-24", modalities=["image", "text"], n_parameters=8_290_000_000, + memory_usage_mb=31629, embed_dim=3584, license="apache-2.0", max_tokens=32768, diff --git a/mteb/models/google_models.py b/mteb/models/google_models.py index f863e289db..a424ecf3d0 100644 --- a/mteb/models/google_models.py +++ b/mteb/models/google_models.py @@ -8,8 +8,7 @@ from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper MULTILINGUAL_EVALUATED_LANGUAGES = [ "arb_Arab", @@ -145,6 +144,7 @@ def encode( revision="1", # revision is intended for implementation release_date="2024-05-14", n_parameters=None, + memory_usage_mb=None, max_tokens=2048, embed_dim=768, license=None, @@ -169,6 +169,7 @@ def encode( revision="1", # revision is intended for implementation release_date="2024-11-18", n_parameters=None, + memory_usage_mb=None, max_tokens=2048, embed_dim=768, license=None, @@ -193,6 +194,7 @@ def encode( revision="1", # revision is intended for implementation release_date="2024-05-14", n_parameters=None, + memory_usage_mb=None, max_tokens=2048, embed_dim=768, license=None, diff --git a/mteb/models/gritlm_models.py b/mteb/models/gritlm_models.py index 8126f3675a..02e48db4e1 100644 --- a/mteb/models/gritlm_models.py +++ b/mteb/models/gritlm_models.py @@ -4,9 +4,8 @@ from functools import partial from mteb.model_meta import ModelMeta - -from .e5_instruct import E5_MISTRAL_TRAINING_DATA -from .instruct_wrapper import instruct_wrapper +from mteb.models.e5_instruct import E5_MISTRAL_TRAINING_DATA +from mteb.models.instruct_wrapper import instruct_wrapper logger = logging.getLogger(__name__) @@ -36,6 +35,7 @@ def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: revision="13f00a0e36500c80ce12870ea513846a066004af", release_date="2024-02-15", n_parameters=7_240_000_000, + memory_usage_mb=13813, embed_dim=4096, license="apache-2.0", max_tokens=4096, @@ -63,6 +63,7 @@ def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: revision="7f089b13e3345510281733ca1e6ff871b5b4bc76", release_date="2024-02-15", n_parameters=57_920_000_000, + memory_usage_mb=89079, embed_dim=4096, license="apache-2.0", max_tokens=4096, diff --git a/mteb/models/gte_models.py b/mteb/models/gte_models.py index af01ccff9b..6ebf2548f0 100644 --- a/mteb/models/gte_models.py +++ b/mteb/models/gte_models.py @@ -39,6 +39,7 @@ def instruction_template( revision="e26182b2122f4435e8b3ebecbf363990f409b45b", release_date="2024-06-15", # initial commit of hf model. n_parameters=7_613_000_000, + memory_usage_mb=29040, embed_dim=3584, license="apache-2.0", reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct", @@ -69,6 +70,7 @@ def instruction_template( revision="07d27e5226328010336563bc1b564a5e3436a298", release_date="2024-04-20", # initial commit of hf model. n_parameters=7_720_000_000, + memory_usage_mb=29449, embed_dim=4096, license="apache-2.0", max_tokens=32768, @@ -99,6 +101,7 @@ def instruction_template( revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd", release_date="2024-07-29", # initial commit of hf model. n_parameters=1_780_000_000, + memory_usage_mb=6776, embed_dim=8960, license="apache-2.0", max_tokens=131072, @@ -123,6 +126,7 @@ def instruction_template( revision="af7bd46fbb00b3a6963c8dd7f1786ddfbfbe973a", release_date="2023-11-08", # initial commit of hf model. n_parameters=30.3 * 1e6, + memory_usage_mb=58, embed_dim=1024, license="mit", max_tokens=512, @@ -147,6 +151,7 @@ def instruction_template( revision="71ab7947d6fac5b64aa299e6e40e6c2b2e85976c", release_date="2023-11-08", # initial commit of hf model. n_parameters=102 * 1e6, + memory_usage_mb=195, embed_dim=1024, license="mit", max_tokens=512, @@ -171,6 +176,7 @@ def instruction_template( revision="64c364e579de308104a9b2c170ca009502f4f545", release_date="2023-11-08", # initial commit of hf model. n_parameters=326 * 1e6, + memory_usage_mb=621, embed_dim=1024, license="mit", max_tokens=512, @@ -292,6 +298,7 @@ def instruction_template( revision="ca1791e0bcc104f6db161f27de1340241b13c5a4", release_date="2024-07-20", # initial commit of hf model. n_parameters=305 * 1e6, + memory_usage_mb=582, embed_dim=1024, license="apache-2", max_tokens=8192, @@ -316,6 +323,7 @@ def instruction_template( revision="7ca8b4ca700621b67618669f5378fe5f5820b8e4", release_date="2025-01-21", # initial commit of hf model. n_parameters=149 * 1e6, + memory_usage_mb=284, embed_dim=768, license="apache-2", max_tokens=8192, diff --git a/mteb/models/ibm_granite_models.py b/mteb/models/ibm_granite_models.py index f7c748c2d8..f0ab0f0c6e 100644 --- a/mteb/models/ibm_granite_models.py +++ b/mteb/models/ibm_granite_models.py @@ -96,6 +96,7 @@ revision="47db56afe692f731540413c67dd818ff492277e7", release_date="2024-12-18", n_parameters=107_000_000, + memory_usage_mb=204, embed_dim=384, license="apache-2.0", max_tokens=512, @@ -122,6 +123,7 @@ revision="84e3546b88b0cb69f8078608a1df558020bcbf1f", release_date="2024-12-18", n_parameters=278_000_000, + memory_usage_mb=530, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -148,6 +150,7 @@ revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5", release_date="2024-12-18", n_parameters=30_000_000, + memory_usage_mb=58, embed_dim=384, license="apache-2.0", max_tokens=512, @@ -174,6 +177,7 @@ revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730", release_date="2024-12-18", n_parameters=125_000_000, + memory_usage_mb=238, embed_dim=768, license="apache-2.0", max_tokens=512, diff --git a/mteb/models/inf_models.py b/mteb/models/inf_models.py index a28261e980..fbb14c93c0 100644 --- a/mteb/models/inf_models.py +++ b/mteb/models/inf_models.py @@ -17,6 +17,7 @@ revision="cb70ca7c31dfa866b2eff2dad229c144d8ddfd91", release_date="2024-12-24", # initial commit of hf model. n_parameters=7_069_121_024, + memory_usage_mb=13483, embed_dim=3584, license="apache-2.0", max_tokens=32768, diff --git a/mteb/models/instruct_wrapper.py b/mteb/models/instruct_wrapper.py index cc6e814629..2cbf9eccbd 100644 --- a/mteb/models/instruct_wrapper.py +++ b/mteb/models/instruct_wrapper.py @@ -9,8 +9,7 @@ from sentence_transformers import SentenceTransformer from mteb.encoder_interface import PromptType - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) diff --git a/mteb/models/jasper_models.py b/mteb/models/jasper_models.py index d0ff4ab681..1cf0b53a54 100644 --- a/mteb/models/jasper_models.py +++ b/mteb/models/jasper_models.py @@ -12,9 +12,8 @@ import mteb from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta - -from .nvidia_models import nvidia_training_datasets -from .wrapper import Wrapper +from mteb.models.nvidia_models import nvidia_training_datasets +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -81,6 +80,7 @@ def encode( revision="d6330ce98f8a0d741e781df845904c9484f00efa", release_date="2024-12-11", # first commit n_parameters=1_999_000_000, + memory_usage_mb=3802, max_tokens=131072, embed_dim=8960, license="apache-2.0", diff --git a/mteb/models/jina_clip.py b/mteb/models/jina_clip.py index 551c82c101..94d498802f 100644 --- a/mteb/models/jina_clip.py +++ b/mteb/models/jina_clip.py @@ -159,6 +159,7 @@ def encode( # type: ignore release_date="2024-05-30", modalities=["image", "text"], n_parameters=223_000_000, + memory_usage_mb=849, max_tokens=8192, embed_dim=768, license="apache-2.0", diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index 87f77bdd64..01b10b1318 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -9,10 +9,9 @@ import torch from sentence_transformers import __version__ as st_version +from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta - -from ..encoder_interface import PromptType -from .sentence_transformer_wrapper import SentenceTransformerWrapper +from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper logger = logging.getLogger(__name__) @@ -215,6 +214,7 @@ def encode( revision="215a6e121fa0183376388ac6b1ae230326bfeaed", release_date="2024-09-18", # official release date n_parameters=int(572 * 1e6), + memory_usage_mb=1092, max_tokens=8194, embed_dim=4096, license="cc-by-nc-4.0", @@ -258,6 +258,7 @@ def encode( revision="6e85f575bc273f1fd840a658067d0157933c83f0", release_date="2023-09-27", n_parameters=137_000_000, + memory_usage_mb=262, embed_dim=768, license="apache-2.0", max_tokens=8192, @@ -285,6 +286,7 @@ def encode( revision="796cff318cdd4e5fbe8b7303a1ef8cbec36996ef", release_date="2023-09-27", n_parameters=32_700_000, + memory_usage_mb=62, embed_dim=512, license="apache-2.0", max_tokens=8192, @@ -312,6 +314,7 @@ def encode( revision="aa0645035294a8c0607ce5bb700aba982cdff32c", release_date="2023-07-07", n_parameters=110_000_000, + memory_usage_mb=420, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -339,6 +342,7 @@ def encode( revision="c1fed70aa4823a640f1a7150a276e4d3b08dce08", release_date="2023-07-07", n_parameters=35_000_000, + memory_usage_mb=134, embed_dim=512, license="apache-2.0", max_tokens=512, diff --git a/mteb/models/lens_models.py b/mteb/models/lens_models.py index 380724e53e..c83bf2a3d0 100644 --- a/mteb/models/lens_models.py +++ b/mteb/models/lens_models.py @@ -1,8 +1,7 @@ from __future__ import annotations from mteb.model_meta import ModelMeta - -from .bge_models import bge_full_data +from mteb.models.bge_models import bge_full_data lens_d4000 = ModelMeta( loader=None, # TODO: implement this in the future @@ -12,6 +11,7 @@ revision="e473b33364e6c48a324796fd1411d3b93670c6fe", release_date="2025-01-17", n_parameters=int(7.11 * 1e9), + memory_usage_mb=27125, embed_dim=4000, license="apache-2.0", reference="https://huggingface.co/yibinlei/LENS-d4000", @@ -32,6 +32,7 @@ revision="a0b87bd91cb27b6f2f0b0fe22c28026da1d464ef", release_date="2025-01-17", n_parameters=int(7.11 * 1e9), + memory_usage_mb=27125, embed_dim=8000, license="apache-2.0", reference="https://huggingface.co/yibinlei/LENS-d8000", diff --git a/mteb/models/linq_models.py b/mteb/models/linq_models.py index ead10ebf71..ec9e0ee9fe 100644 --- a/mteb/models/linq_models.py +++ b/mteb/models/linq_models.py @@ -32,6 +32,7 @@ def instruction_template( revision="0c1a0b0589177079acc552433cad51d7c9132379", release_date="2024-05-29", # initial commit of hf model. n_parameters=7_110_000_000, + memory_usage_mb=13563, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, diff --git a/mteb/models/llm2vec_models.py b/mteb/models/llm2vec_models.py index 4ea017640c..eb2a8537c0 100644 --- a/mteb/models/llm2vec_models.py +++ b/mteb/models/llm2vec_models.py @@ -8,8 +8,7 @@ from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -122,6 +121,7 @@ def loader_inner(**kwargs: Any) -> Encoder: # TODO: Not sure what to put here as a model is made of two peft repos, each with a different revision release_date="2024-04-09", n_parameters=7_505_000_000, + memory_usage_mb=28629, max_tokens=8192, embed_dim=4096, license="mit", @@ -148,6 +148,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="1cb7b735326d13a8541db8f57f35da5373f5e9c6", release_date="2024-04-09", n_parameters=7_505_000_000, + memory_usage_mb=28629, max_tokens=8192, embed_dim=4096, license="mit", @@ -174,6 +175,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="0ae69bdd5816105778b971c3138e8f8a18eaa3ae", release_date="2024-04-09", n_parameters=7_111_000_000, + memory_usage_mb=27126, max_tokens=32768, embed_dim=4096, license="mit", @@ -200,6 +202,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8", release_date="2024-04-09", n_parameters=7_111_000_000, + memory_usage_mb=27126, max_tokens=32768, embed_dim=4096, license="mit", @@ -226,6 +229,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8", release_date="2024-04-09", n_parameters=7_111_000_000, + memory_usage_mb=27126, max_tokens=32768, embed_dim=4096, license="mit", @@ -252,6 +256,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="a76944871d169ebe7c97eb921764cd063afed785", release_date="2024-04-09", n_parameters=7_111_000_000, + memory_usage_mb=27126, max_tokens=32768, embed_dim=4096, license="mit", @@ -278,6 +283,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="a5943d406c6b016fef3f07906aac183cf1a0b47d", release_date="2024-04-09", n_parameters=7_111_000_000, + memory_usage_mb=27126, max_tokens=32768, embed_dim=4096, license="mit", @@ -304,6 +310,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="a5943d406c6b016fef3f07906aac183cf1a0b47d", release_date="2024-04-09", n_parameters=7_111_000_000, + memory_usage_mb=27126, max_tokens=32768, embed_dim=4096, license="mit", diff --git a/mteb/models/misc_models.py b/mteb/models/misc_models.py index da1f591c0d..ba4f9cb901 100644 --- a/mteb/models/misc_models.py +++ b/mteb/models/misc_models.py @@ -5,10 +5,9 @@ import torch from mteb.model_meta import ModelMeta, sentence_transformers_loader +from mteb.models.bge_models import bge_m3_training_data, bge_training_data from mteb.models.e5_models import E5_TRAINING_DATA - -from .bge_models import bge_m3_training_data, bge_training_data -from .sentence_transformers_models import sent_trf_training_dataset +from mteb.models.sentence_transformers_models import sent_trf_training_dataset Haon_Chen__speed_embedding_7b_instruct = ModelMeta( name="Haon-Chen/speed-embedding-7b-instruct", @@ -17,6 +16,7 @@ languages=["eng_Latn"], loader=None, n_parameters=7110660096, + memory_usage_mb=13563, max_tokens=32768.0, embed_dim=None, license="mit", @@ -38,6 +38,7 @@ languages=[], loader=None, n_parameters=278043648, + memory_usage_mb=1061, max_tokens=514.0, embed_dim=768, license=None, @@ -59,6 +60,7 @@ languages=None, loader=None, n_parameters=494032768, + memory_usage_mb=1885, max_tokens=131072.0, embed_dim=896, license="mit", @@ -80,6 +82,7 @@ languages=None, loader=None, n_parameters=494032768, + memory_usage_mb=1885, max_tokens=131072.0, embed_dim=896, license="mit", @@ -101,6 +104,7 @@ languages=["eng_Latn"], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=768, license="apache-2.0", @@ -166,6 +170,7 @@ languages=[], loader=None, n_parameters=2506172416, + memory_usage_mb=9560, max_tokens=8192.0, embed_dim=2048, license=None, @@ -187,6 +192,7 @@ languages=["eng_Latn"], loader=None, n_parameters=7241732096, + memory_usage_mb=27625, max_tokens=32768.0, embed_dim=None, license="apache-2.0", @@ -214,6 +220,7 @@ trust_remote_code=True, ), n_parameters=278043648, + memory_usage_mb=1061, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -240,6 +247,7 @@ trust_remote_code=True, ), n_parameters=559890432, + memory_usage_mb=2136, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -266,6 +274,7 @@ trust_remote_code=True, ), n_parameters=117653760, + memory_usage_mb=449, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -287,6 +296,7 @@ languages=None, loader=None, n_parameters=17389824, + memory_usage_mb=66, max_tokens=512.0, embed_dim=384, license="mit", @@ -309,6 +319,7 @@ languages=None, loader=None, n_parameters=22713216, + memory_usage_mb=87, max_tokens=512.0, embed_dim=384, license="mit", @@ -331,6 +342,7 @@ languages=None, loader=None, n_parameters=15615360, + memory_usage_mb=60, max_tokens=512.0, embed_dim=384, license="mit", @@ -353,6 +365,7 @@ languages=None, loader=None, n_parameters=15615360, + memory_usage_mb=60, max_tokens=512.0, embed_dim=384, license="mit", @@ -375,6 +388,7 @@ languages=None, loader=None, n_parameters=17389824, + memory_usage_mb=66, max_tokens=512.0, embed_dim=384, license="mit", @@ -397,6 +411,7 @@ languages=None, loader=None, n_parameters=17389824, + memory_usage_mb=66, max_tokens=512.0, embed_dim=384, license="mit", @@ -418,6 +433,7 @@ languages=None, loader=None, n_parameters=19164288, + memory_usage_mb=73, max_tokens=512.0, embed_dim=384, license="mit", @@ -439,6 +455,7 @@ languages=["fra_Latn"], loader=None, n_parameters=559890432, + memory_usage_mb=2136, max_tokens=514.0, embed_dim=1024, license="mit", @@ -460,6 +477,7 @@ languages=["ara_Arab"], loader=None, n_parameters=135193344, + memory_usage_mb=516, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -481,6 +499,7 @@ languages=["ara_Arab"], loader=None, n_parameters=117653760, + memory_usage_mb=449, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -504,6 +523,7 @@ languages=["ara_Arab"], loader=None, n_parameters=278043648, + memory_usage_mb=1061, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -527,6 +547,7 @@ languages=["ara_Arab"], loader=None, n_parameters=470926848, + memory_usage_mb=1796, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -550,6 +571,7 @@ languages=["ara_Arab"], loader=None, n_parameters=109486464, + memory_usage_mb=418, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -573,6 +595,7 @@ languages=["ara_Arab"], loader=None, n_parameters=162841344, + memory_usage_mb=621, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -594,6 +617,7 @@ languages=None, loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=512.0, embed_dim=1024, license="apache-2.0", @@ -615,6 +639,7 @@ languages=None, loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=514.0, embed_dim=768, license=None, @@ -636,6 +661,7 @@ languages=None, loader=None, n_parameters=567754752, + memory_usage_mb=2166, max_tokens=8194.0, embed_dim=1024, license=None, @@ -657,6 +683,7 @@ languages=None, loader=None, n_parameters=1279887360, + memory_usage_mb=2441, max_tokens=2048.0, embed_dim=2048, license=None, @@ -678,6 +705,7 @@ languages=None, loader=None, n_parameters=1279887360, + memory_usage_mb=2441, max_tokens=2048.0, embed_dim=2048, license=None, @@ -699,6 +727,7 @@ languages=["fra_Latn", "eng_Latn"], loader=None, n_parameters=1279887360, + memory_usage_mb=2441, max_tokens=2048.0, embed_dim=2048, license="mit", @@ -721,6 +750,7 @@ languages=["eng_Latn"], loader=None, n_parameters=109482752, + memory_usage_mb=209, max_tokens=512.0, embed_dim=768, license="mit", @@ -742,6 +772,7 @@ languages=["eng_Latn"], loader=None, n_parameters=335142400, + memory_usage_mb=639, max_tokens=512.0, embed_dim=1024, license="mit", @@ -763,6 +794,7 @@ languages=["eng_Latn"], loader=None, n_parameters=33360512, + memory_usage_mb=64, max_tokens=512.0, embed_dim=384, license="mit", @@ -784,6 +816,7 @@ languages=["pol_Latn"], loader=None, n_parameters=103705344, + memory_usage_mb=396, max_tokens=512.0, embed_dim=768, license="gpl-3.0", @@ -805,6 +838,7 @@ languages=["pol_Latn"], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=514.0, embed_dim=768, license="lgpl", @@ -826,6 +860,7 @@ languages=["pol_Latn"], loader=None, n_parameters=278043648, + memory_usage_mb=1061, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -847,6 +882,7 @@ languages=["eng_Latn"], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=4096.0, embed_dim=None, license="mit", @@ -868,6 +904,7 @@ languages=["pol_Latn"], loader=None, n_parameters=559890432, + memory_usage_mb=2136, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -889,6 +926,7 @@ languages=["pol_Latn"], loader=None, n_parameters=117653760, + memory_usage_mb=449, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -910,6 +948,7 @@ languages=["pol_Latn"], loader=None, n_parameters=124442880, + memory_usage_mb=475, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -931,6 +970,7 @@ languages=["pol_Latn"], loader=None, n_parameters=434961408, + memory_usage_mb=1659, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -998,6 +1038,7 @@ ], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1065,6 +1106,7 @@ ], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1132,6 +1174,7 @@ ], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1199,6 +1242,7 @@ ], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1220,6 +1264,7 @@ languages=["eng_Latn"], loader=None, n_parameters=109482240, + memory_usage_mb=418, max_tokens=512.0, embed_dim=768, license="mit", @@ -1241,6 +1286,7 @@ languages=["eng_Latn"], loader=None, n_parameters=22713216, + memory_usage_mb=87, max_tokens=512.0, embed_dim=384, license="mit", @@ -1262,6 +1308,7 @@ languages=["eng_Latn"], loader=None, n_parameters=335141888, + memory_usage_mb=1278, max_tokens=512.0, embed_dim=1024, license="mit", @@ -1283,6 +1330,7 @@ languages=["eng_Latn"], loader=None, n_parameters=33360000, + memory_usage_mb=127, max_tokens=512.0, embed_dim=384, license="mit", @@ -1304,6 +1352,7 @@ languages=None, loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=4096, license=None, @@ -1325,6 +1374,7 @@ languages=["deu_Latn"], loader=None, n_parameters=335736320, + memory_usage_mb=1281, max_tokens=512.0, embed_dim=1024, license=None, @@ -1347,6 +1397,7 @@ languages=["eng_Latn"], loader=None, n_parameters=33360000, + memory_usage_mb=127, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -1377,6 +1428,7 @@ languages=["eng_Latn"], loader=None, n_parameters=33360000, + memory_usage_mb=127, max_tokens=512.0, embed_dim=384, license="mit", @@ -1398,6 +1450,7 @@ languages=["eng_Latn"], loader=None, n_parameters=22713216, + memory_usage_mb=87, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -1419,6 +1472,7 @@ languages=None, loader=None, n_parameters=None, + memory_usage_mb=1061, max_tokens=514.0, embed_dim=768, license=None, @@ -1440,6 +1494,7 @@ languages=["rus_Cyrl"], loader=None, n_parameters=359026688, + memory_usage_mb=1370, max_tokens=8194.0, embed_dim=1024, license="apache-2.0", @@ -1472,6 +1527,7 @@ languages=["eng_Latn"], loader=None, n_parameters=None, + memory_usage_mb=None, max_tokens=512.0, embed_dim=None, license="mit", @@ -1493,6 +1549,7 @@ languages=None, loader=None, n_parameters=98688000, + memory_usage_mb=158, max_tokens=512.0, embed_dim=1024, license=None, @@ -1514,6 +1571,7 @@ languages=["ara_Arab", "eng_Latn"], loader=None, n_parameters=559890432, + memory_usage_mb=2136, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -1545,6 +1603,7 @@ release_date="2024-09-04", languages=["zho_Hans", "eng_Latn"], n_parameters=2724880896, + memory_usage_mb=5197, max_tokens=512.0, embed_dim=2304, license=None, @@ -1576,6 +1635,7 @@ ], loader=None, n_parameters=117654272, + memory_usage_mb=449, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -1598,6 +1658,7 @@ languages=["ara_Arab", "eng_Latn"], loader=None, n_parameters=135193344, + memory_usage_mb=516, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -1619,7 +1680,8 @@ release_date="2022-03-25", languages=["zho_Hans"], loader=None, - n_parameters=None, # Not visible on repo + n_parameters=None, + memory_usage_mb=None, # Not visible on repo max_tokens=512, embed_dim=128, license="apache-2", @@ -1645,6 +1707,7 @@ languages=["zho_Hans"], loader=None, n_parameters=74.2 * 1e6, + memory_usage_mb=283, max_tokens=1024, embed_dim=768, license="apache-2", @@ -1665,6 +1728,7 @@ languages=["zho_Hans"], loader=None, n_parameters=326 * 1e6, + memory_usage_mb=1244, max_tokens=512, embed_dim=1024, license="not specified", @@ -1686,6 +1750,7 @@ languages=["zho_Hans"], loader=None, n_parameters=326 * 1e6, + memory_usage_mb=1242, max_tokens=512, embed_dim=768, license="not specified", @@ -1707,6 +1772,7 @@ languages=["zho_Hans"], loader=None, n_parameters=326 * 1e6, + memory_usage_mb=1244, max_tokens=512, embed_dim=1024, license="not specified", @@ -1728,6 +1794,7 @@ languages=["zho_Hans"], loader=None, n_parameters=326 * 1e6, + memory_usage_mb=1242, max_tokens=512, embed_dim=768, license="cc-by-nc-4.0", @@ -1748,6 +1815,7 @@ release_date="2023-10-10", languages=["eng_Latn"], n_parameters=335 * 1e6, + memory_usage_mb=1278, max_tokens=512, embed_dim=1024, license="mit", @@ -1767,6 +1835,7 @@ release_date="2024-04-30", languages=["eng_Latn"], n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license="proprietary", diff --git a/mteb/models/moco_models.py b/mteb/models/moco_models.py index 1c896331bc..cb2ee875da 100644 --- a/mteb/models/moco_models.py +++ b/mteb/models/moco_models.py @@ -153,6 +153,7 @@ def get_fused_embeddings( release_date="2024-06-03", modalities=["image"], n_parameters=86_600_000, + memory_usage_mb=330, max_tokens=None, embed_dim=768, license="cc-by-nc-4.0", @@ -177,6 +178,7 @@ def get_fused_embeddings( release_date="2024-06-03", modalities=["image"], n_parameters=304_000_000, + memory_usage_mb=1161, max_tokens=None, embed_dim=1024, license="cc-by-nc-4.0", diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py index 33da211c7a..ee79f1cafa 100644 --- a/mteb/models/model2vec_models.py +++ b/mteb/models/model2vec_models.py @@ -8,9 +8,8 @@ import numpy as np from mteb.model_meta import ModelMeta - -from .bge_models import bge_training_data -from .wrapper import Wrapper +from mteb.models.bge_models import bge_training_data +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -65,6 +64,7 @@ def encode( revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4", release_date="2024-09-21", n_parameters=int(103 * 1e6), + memory_usage_mb=391, max_tokens=np.inf, # Theoretically infinite embed_dim=256, license="mit", @@ -91,6 +91,7 @@ def encode( revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b", release_date="2024-09-21", n_parameters=int(102 * 1e6), + memory_usage_mb=391, max_tokens=np.inf, embed_dim=256, license="mit", @@ -116,6 +117,7 @@ def encode( revision="02460ae401a22b09d2c6652e23371398329551e2", release_date="2024-09-21", n_parameters=int(7.56 * 1e6), + memory_usage_mb=29, max_tokens=np.inf, embed_dim=256, license="mit", @@ -141,6 +143,7 @@ def encode( revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305", release_date="2024-09-21", n_parameters=int(128 * 1e6), + memory_usage_mb=489, max_tokens=np.inf, embed_dim=256, license="mit", @@ -166,6 +169,7 @@ def encode( revision="86db093558fbced2072b929eb1690bce5272bd4b", release_date="2024-10-29", n_parameters=2 * 1e6, + memory_usage_mb=7, max_tokens=np.inf, embed_dim=64, license="mit", @@ -191,6 +195,7 @@ def encode( revision="81b1802ada41afcd0987a37dc15e569c9fa76f04", release_date="2024-10-29", n_parameters=3.78 * 1e6, + memory_usage_mb=14, max_tokens=np.inf, embed_dim=128, license="mit", @@ -216,6 +221,7 @@ def encode( revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce", release_date="2024-10-29", n_parameters=7.56 * 1e6, + memory_usage_mb=29, max_tokens=np.inf, embed_dim=256, license="mit", diff --git a/mteb/models/moka_models.py b/mteb/models/moka_models.py index c0d22b7552..2c62dcea9b 100644 --- a/mteb/models/moka_models.py +++ b/mteb/models/moka_models.py @@ -88,6 +88,7 @@ revision="764b537a0e50e5c7d64db883f2d2e051cbe3c64c", release_date="2023-06-06", # first commit n_parameters=102 * 1e6, + memory_usage_mb=390, embed_dim=768, # They don't give a specific license but commercial use is not allowed license="unspecified-noncommercial", @@ -109,7 +110,8 @@ open_weights=True, revision="44c696631b2a8c200220aaaad5f987f096e986df", release_date="2023-06-02", # first commit - n_parameters=None, # Can't be seen on HF page + n_parameters=None, + memory_usage_mb=None, # Can't be seen on HF page embed_dim=512, # They don't give a specific license but commercial use is not allowed license="unspecified-noncommercial", @@ -131,7 +133,8 @@ open_weights=True, revision="12900375086c37ba5d83d1e417b21dc7d1d1f388", release_date="2023-06-21", # first commit - n_parameters=None, # Can't be seen on HF page + n_parameters=None, + memory_usage_mb=None, # Can't be seen on HF page embed_dim=768, # They don't give a specific license but commercial use is not allowed license="unspecified-noncommercial", diff --git a/mteb/models/mxbai_models.py b/mteb/models/mxbai_models.py index 921db17871..133bbbed7f 100644 --- a/mteb/models/mxbai_models.py +++ b/mteb/models/mxbai_models.py @@ -19,6 +19,7 @@ revision="990580e27d329c7408b3741ecff85876e128e203", release_date="2024-03-07", # initial commit of hf model. n_parameters=335_000_000, + memory_usage_mb=639, max_tokens=512, embed_dim=1024, license="apache-2.0", diff --git a/mteb/models/no_instruct_sentence_models.py b/mteb/models/no_instruct_sentence_models.py index 9ff5cf901f..963cbdebed 100644 --- a/mteb/models/no_instruct_sentence_models.py +++ b/mteb/models/no_instruct_sentence_models.py @@ -9,10 +9,9 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta +from mteb.models.utils import batched from mteb.models.wrapper import Wrapper -from .utils import batched - class NoInstructWrapper(Wrapper): def __init__( @@ -90,6 +89,7 @@ def encode( # type: ignore revision="b38747000553d8268915c95a55fc87e707c9aadd", release_date="2024-05-01", # first commit n_parameters=33_400_000, + memory_usage_mb=127, max_tokens=512, embed_dim=384, license="mit", diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 13c8e59033..f02157aa09 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -13,8 +13,7 @@ import mteb from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta - -from .sentence_transformer_wrapper import SentenceTransformerWrapper +from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper logger = logging.getLogger(__name__) @@ -195,6 +194,7 @@ def encode( # type: ignore revision="b0753ae76394dd36bcfb912a46018088bca48be0", release_date="2024-02-10", # first commit n_parameters=137_000_000, + memory_usage_mb=522, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -223,6 +223,7 @@ def encode( # type: ignore revision="0759316f275aa0cb93a5b830973843ca66babcf5", release_date="2024-01-31", # first commit n_parameters=None, + memory_usage_mb=522, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -251,6 +252,7 @@ def encode( # type: ignore revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f", release_date="2024-01-15", # first commit n_parameters=None, + memory_usage_mb=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -279,6 +281,7 @@ def encode( # type: ignore revision="b53d557b15ae63852847c222d336c1609eced93c", release_date="2024-01-15", # first commit n_parameters=None, + memory_usage_mb=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -309,6 +312,7 @@ def encode( # type: ignore revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12", release_date="2024-12-29", n_parameters=149_000_000, + memory_usage_mb=568, max_tokens=8192, embed_dim=768, license="apache-2.0", diff --git a/mteb/models/nomic_models_vision.py b/mteb/models/nomic_models_vision.py index 4eb00316ae..661bb7aa1f 100644 --- a/mteb/models/nomic_models_vision.py +++ b/mteb/models/nomic_models_vision.py @@ -172,6 +172,7 @@ def get_fused_embeddings( release_date="2024-06-08", modalities=["image", "text"], n_parameters=92_900_000, + memory_usage_mb=355, max_tokens=2048, embed_dim=768, license="apache-2.0", diff --git a/mteb/models/nvidia_models.py b/mteb/models/nvidia_models.py index 3a6ae0a566..1cfa01cdef 100644 --- a/mteb/models/nvidia_models.py +++ b/mteb/models/nvidia_models.py @@ -89,6 +89,7 @@ def instruction_template( revision="7604d305b621f14095a1aa23d351674c2859553a", release_date="2024-09-09", # initial commit of hf model. n_parameters=7_850_000_000, + memory_usage_mb=14975, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, @@ -119,6 +120,7 @@ def instruction_template( revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c", release_date="2024-09-13", # initial commit of hf model. n_parameters=7_850_000_000, + memory_usage_mb=29945, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 079e7c9361..813822e169 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -8,10 +8,9 @@ import tqdm from mteb.model_meta import ModelMeta +from mteb.models.wrapper import Wrapper from mteb.requires_package import requires_package -from .wrapper import Wrapper - logger = logging.getLogger(__name__) @@ -130,6 +129,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: embed_dim=1536, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://openai.com/index/new-embedding-models-and-api-updates/", similarity_fn_name="cosine", @@ -157,6 +157,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, + memory_usage_mb=None, public_training_code=None, public_training_data=None, # assumed training_datasets=None, @@ -181,6 +182,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, + memory_usage_mb=None, public_training_code=None, public_training_data=None, # assumed training_datasets=None, diff --git a/mteb/models/openclip_models.py b/mteb/models/openclip_models.py index 26e89e6cb6..3079ff6933 100644 --- a/mteb/models/openclip_models.py +++ b/mteb/models/openclip_models.py @@ -161,6 +161,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=428_000_000, + memory_usage_mb=1633, max_tokens=77, embed_dim=768, license="mit", @@ -187,6 +188,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=151_000_000, + memory_usage_mb=576, max_tokens=77, embed_dim=512, license="mit", @@ -213,6 +215,7 @@ def get_fused_embeddings( release_date="2023-04-26", modalities=["image", "text"], n_parameters=150_000_000, + memory_usage_mb=572, max_tokens=77, embed_dim=512, license="mit", @@ -239,6 +242,7 @@ def get_fused_embeddings( release_date="2023-01-23", modalities=["image", "text"], n_parameters=2_540_000_000, + memory_usage_mb=9689, max_tokens=77, embed_dim=1280, license="mit", @@ -265,6 +269,7 @@ def get_fused_embeddings( release_date="2023-03-06", modalities=["image", "text"], n_parameters=1_367_000_000, + memory_usage_mb=5215, max_tokens=77, embed_dim=1024, license="mit", @@ -291,6 +296,7 @@ def get_fused_embeddings( release_date="2022-09-15", modalities=["image", "text"], n_parameters=986_000_000, + memory_usage_mb=3762, max_tokens=77, embed_dim=1024, license="mit", @@ -317,6 +323,7 @@ def get_fused_embeddings( release_date="2022-09-15", modalities=["image", "text"], n_parameters=428_000_000, + memory_usage_mb=1631, max_tokens=77, embed_dim=768, license="mit", @@ -343,6 +350,7 @@ def get_fused_embeddings( release_date="2022-09-15", modalities=["image", "text"], n_parameters=151_000_000, + memory_usage_mb=577, max_tokens=77, embed_dim=512, license="mit", diff --git a/mteb/models/overview.py b/mteb/models/overview.py index a43694141d..ec8fc8f0bd 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -258,6 +258,7 @@ def model_meta_from_hf_hub(model_name: str) -> ModelMeta: training_datasets=card_data.get("datasets", None), similarity_fn_name=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, open_weights=True, @@ -273,6 +274,7 @@ def model_meta_from_hf_hub(model_name: str) -> ModelMeta: languages=None, release_date=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -306,6 +308,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe framework=["Sentence Transformers"], similarity_fn_name=model.similarity_fn_name, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -325,6 +328,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe languages=None, release_date=None, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, diff --git a/mteb/models/piccolo_models.py b/mteb/models/piccolo_models.py index d51487b8ba..4c24e9ba86 100644 --- a/mteb/models/piccolo_models.py +++ b/mteb/models/piccolo_models.py @@ -10,7 +10,8 @@ open_weights=True, revision="47c0a63b8f667c3482e05b2fd45577bb19252196", release_date="2023-09-04", # first commit - n_parameters=None, # can't see on model card + n_parameters=None, + memory_usage_mb=None, # can't see on model card embed_dim=768, license="mit", max_tokens=512, @@ -32,7 +33,8 @@ # "Due to certain internal company considerations" revision="05948c1d889355936bdf9db7d30df57dd78d25a3", release_date="2024-04-22", # first commit - n_parameters=None, # we don't know because they removed the model + n_parameters=None, + memory_usage_mb=None, # we don't know because they removed the model embed_dim=1024, license="not specified", max_tokens=512, diff --git a/mteb/models/promptriever_models.py b/mteb/models/promptriever_models.py index 79ed1269e0..cbed2e89c8 100644 --- a/mteb/models/promptriever_models.py +++ b/mteb/models/promptriever_models.py @@ -8,9 +8,8 @@ from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta - -from .repllama_models import RepLLaMAWrapper -from .wrapper import Wrapper +from mteb.models.repllama_models import RepLLaMAWrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -55,7 +54,8 @@ def loader_inner(**kwargs: Any) -> Encoder: open_weights=True, revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision release_date="2024-09-15", - n_parameters=7_000_000, + n_parameters=7_000_000_000, + memory_usage_mb=27, max_tokens=4096, embed_dim=4096, license="apache-2.0", @@ -85,7 +85,8 @@ def loader_inner(**kwargs: Any) -> Encoder: "mMARCO-NL": ["train"], # translation not trained on }, release_date="2024-09-15", - n_parameters=8_000_000, + n_parameters=8_000_000_000, + memory_usage_mb=31, max_tokens=8192, embed_dim=4096, license="apache-2.0", @@ -110,7 +111,8 @@ def loader_inner(**kwargs: Any) -> Encoder: open_weights=True, revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision release_date="2024-09-15", - n_parameters=8_000_000, + n_parameters=8_000_000_000, + memory_usage_mb=31, max_tokens=8192, embed_dim=4096, training_datasets={ @@ -139,7 +141,8 @@ def loader_inner(**kwargs: Any) -> Encoder: open_weights=True, revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision release_date="2024-09-15", - n_parameters=7_000_000, + n_parameters=7_000_000_000, + memory_usage_mb=27, training_datasets={ "samaya-ai/msmarco-w-instructions": ["train"], "mMARCO-NL": ["train"], # translation not trained on diff --git a/mteb/models/qtack_models.py b/mteb/models/qtack_models.py index f7f83f728a..4cfd43461a 100644 --- a/mteb/models/qtack_models.py +++ b/mteb/models/qtack_models.py @@ -37,6 +37,7 @@ revision="7fbe6f9b4cc42615e0747299f837ad7769025492", release_date="2025-01-28", n_parameters=66.3 * 1e6, + memory_usage_mb=253, embed_dim=768, license="apache-2.0", max_tokens=512, diff --git a/mteb/models/repllama_models.py b/mteb/models/repllama_models.py index b3c206651e..8631af927b 100644 --- a/mteb/models/repllama_models.py +++ b/mteb/models/repllama_models.py @@ -11,8 +11,7 @@ from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) @@ -145,6 +144,7 @@ def loader_inner(**kwargs: Any) -> Encoder: "mMARCO-NL": ["train"], # translation not trained on }, n_parameters=7_000_000, + memory_usage_mb=27, max_tokens=4096, embed_dim=4096, license="apache-2.0", @@ -172,6 +172,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision release_date="2024-09-15", n_parameters=7_000_000, + memory_usage_mb=27, max_tokens=4096, embed_dim=4096, license="apache-2.0", diff --git a/mteb/models/rerankers_custom.py b/mteb/models/rerankers_custom.py index 0e2c8d8f73..7c966cdf78 100644 --- a/mteb/models/rerankers_custom.py +++ b/mteb/models/rerankers_custom.py @@ -206,6 +206,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="0a97706f3827389da43b83348d5d18c9d53876fa", release_date="2020-05-28", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -231,6 +232,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="126747772a932960028d9f4dc93bd5d9c4869be4", release_date="2024-09-26", n_parameters=None, + memory_usage_mb=531, max_tokens=None, embed_dim=None, license=None, @@ -288,6 +290,7 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e", release_date="2024-06-24", n_parameters=None, + memory_usage_mb=2166, max_tokens=None, embed_dim=None, license=None, diff --git a/mteb/models/rerankers_monot5_based.py b/mteb/models/rerankers_monot5_based.py index c53b364000..afc31e5a2d 100644 --- a/mteb/models/rerankers_monot5_based.py +++ b/mteb/models/rerankers_monot5_based.py @@ -297,6 +297,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e", release_date="2022-03-28", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -321,6 +322,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="f15657ab3d2a5dd0b9a30c8c0b6a0a73c9cb5884", release_date="2022-03-28", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -345,6 +347,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98", release_date="2022-03-28", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -369,6 +372,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="bc0c419a438c81f592f878ce32430a1823f5db6c", release_date="2022-03-28", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -405,6 +409,7 @@ def get_prediction_tokens(self, *args, **kwargs): "qed": ["train"], }, n_parameters=None, + memory_usage_mb=944, max_tokens=None, embed_dim=None, license=None, @@ -440,6 +445,7 @@ def get_prediction_tokens(self, *args, **kwargs): "qed": ["train"], }, n_parameters=None, + memory_usage_mb=2987, max_tokens=None, embed_dim=None, license=None, @@ -475,6 +481,7 @@ def get_prediction_tokens(self, *args, **kwargs): "qed": ["train"], }, n_parameters=None, + memory_usage_mb=10871, max_tokens=None, embed_dim=None, license=None, @@ -510,6 +517,7 @@ def get_prediction_tokens(self, *args, **kwargs): "qed": ["train"], }, n_parameters=None, + memory_usage_mb=42980, max_tokens=None, embed_dim=None, license=None, @@ -534,6 +542,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="01c7f73d771dfac7d292323805ebc428287df4f9", release_date="2023-07-18", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -558,6 +567,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="f5db02db724555f92da89c216ac04704f23d4590", release_date="2023-07-18", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -582,6 +592,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="3ad372fc79158a2148299e3318516c786aeded6c", release_date="2023-12-11", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -607,6 +618,7 @@ def get_prediction_tokens(self, *args, **kwargs): release_date="2024-04-29", training_datasets={"jhu-clsp/FollowIR-train": ["train"]}, n_parameters=None, + memory_usage_mb=13813, max_tokens=None, embed_dim=None, license=None, @@ -736,6 +748,7 @@ def get_prediction_tokens(self, *args, **kwargs): release_date="2022-01-05", training_datasets={"msmarco": ["train"]}, n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, @@ -759,6 +772,7 @@ def get_prediction_tokens(self, *args, **kwargs): revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc", release_date="2022-11-04", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None, diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index 94cdf04e0b..0aba5e03f2 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -5,8 +5,7 @@ from functools import partial from mteb.model_meta import ModelMeta, sentence_transformers_loader - -from .bge_models import bge_m3_training_data +from mteb.models.bge_models import bge_m3_training_data rubert_tiny = ModelMeta( name="cointegrated/rubert-tiny", @@ -15,6 +14,7 @@ revision="5441c5ea8026d4f6d7505ec004845409f1259fb1", release_date="2021-05-24", n_parameters=11_900_000, + memory_usage_mb=45, embed_dim=312, license="mit", max_tokens=512, @@ -38,6 +38,7 @@ revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3", release_date="2021-10-28", n_parameters=29_400_000, + memory_usage_mb=112, embed_dim=312, license="mit", max_tokens=2048, @@ -62,6 +63,7 @@ revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a", release_date="2020-11-20", n_parameters=427_000_000, + memory_usage_mb=1629, embed_dim=1024, license="mit", max_tokens=512, # best guess @@ -81,6 +83,7 @@ revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0", release_date="2021-05-18", n_parameters=427_000_000, + memory_usage_mb=1629, embed_dim=1024, license="Not specified", max_tokens=512, # best guess @@ -109,6 +112,7 @@ revision="436a489a2087d61aa670b3496a9915f84e46c861", release_date="2024-06-10", n_parameters=427_000_000, + memory_usage_mb=473, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -164,6 +168,7 @@ revision="0cc6cfe48e260fb0474c753087a69369e88709ae", release_date="2024-07-05", n_parameters=359_026_688, + memory_usage_mb=1370, embed_dim=1024, license="apache-2.0", max_tokens=8194, @@ -206,6 +211,7 @@ revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4", release_date="2023-02-07", n_parameters=124_000_000, + memory_usage_mb=473, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -226,6 +232,7 @@ revision="4036cab694767a299f2b9e6492909664d9414229", release_date="2020-03-04", n_parameters=1280_000_000, + memory_usage_mb=4883, embed_dim=768, license="Not specified", max_tokens=512, @@ -245,6 +252,7 @@ revision="e348066b4a7279b97138038299bddc6580a9169a", release_date="2022-06-28", n_parameters=107_000_000, + memory_usage_mb=408, embed_dim=768, license="Not specified", max_tokens=512, @@ -264,6 +272,7 @@ revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8", release_date="2020-03-04", n_parameters=107_000_000, + memory_usage_mb=408, embed_dim=768, license="Not specified", max_tokens=512, @@ -286,6 +295,7 @@ revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e", release_date="2021-06-10", n_parameters=129_000_000, + memory_usage_mb=492, embed_dim=768, license="Not specified", max_tokens=512, @@ -306,6 +316,7 @@ revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054", release_date="2024-06-21", n_parameters=29_200_000, + memory_usage_mb=111, embed_dim=312, license="mit", max_tokens=2048, @@ -315,7 +326,7 @@ use_instructions=False, public_training_code=None, public_training_data=None, - training_datasets=None, # source model in unknown + training_datasets=None, # Not MTEB: {"IlyaGusev/gazeta": ["train"], "zloelias/lenta-ru": ["train"]}, adapted_from="cointegrated/rubert-tiny2", ) @@ -327,6 +338,7 @@ revision="1940b046c6b5e125df11722b899130329d0a46da", release_date="2024-06-27", n_parameters=129_000_000, + memory_usage_mb=490, embed_dim=768, license="mit", max_tokens=512, @@ -361,6 +373,7 @@ release_date="2024-07-29", use_instructions=True, n_parameters=404_000_000, + memory_usage_mb=1540, max_tokens=514, embed_dim=1024, license="mit", diff --git a/mteb/models/salesforce_models.py b/mteb/models/salesforce_models.py index f534836e46..b25c9b86a8 100644 --- a/mteb/models/salesforce_models.py +++ b/mteb/models/salesforce_models.py @@ -4,10 +4,9 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta +from mteb.models.e5_instruct import E5_MISTRAL_TRAINING_DATA from mteb.models.instruct_wrapper import instruct_wrapper -from .e5_instruct import E5_MISTRAL_TRAINING_DATA - def instruction_template( instruction: str, prompt_type: PromptType | None = None @@ -49,6 +48,7 @@ def instruction_template( revision="91762139d94ed4371a9fa31db5551272e0b83818", release_date="2024-06-14", # initial commit of hf model. n_parameters=7_110_000_000, + memory_usage_mb=13563, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, @@ -79,6 +79,7 @@ def instruction_template( revision="938c560d1c236aa563b2dbdf084f28ab28bccb11", release_date="2024-01-24", # initial commit of hf model. n_parameters=7_110_000_000, + memory_usage_mb=13563, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 4a7dbd8ffa..872a3fffe9 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -9,8 +9,7 @@ from sentence_transformers import CrossEncoder, SentenceTransformer from mteb.encoder_interface import PromptType - -from .wrapper import Wrapper +from mteb.models.wrapper import Wrapper logger = logging.getLogger(__name__) diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index c74aaf1d49..9ebf126078 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -103,6 +103,7 @@ revision="8b3219a92973c328a8e22fadcfa821b5dc75636a", release_date="2021-08-30", n_parameters=22_700_000, + memory_usage_mb=87, embed_dim=384, license="apache-2.0", max_tokens=256, @@ -124,6 +125,7 @@ revision="364dd28d28dcd3359b537f3cf1f5348ba679da62", release_date="2021-08-30", n_parameters=33_400_000, + memory_usage_mb=127, embed_dim=384, license="apache-2.0", max_tokens=256, @@ -145,6 +147,7 @@ revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb", release_date="2019-11-01", # release date of paper n_parameters=118_000_000, + memory_usage_mb=449, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -166,6 +169,7 @@ revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6", release_date="2019-11-01", # release date of paper n_parameters=278_000_000, + memory_usage_mb=1061, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -198,6 +202,7 @@ revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7", release_date="2019-11-01", # release date of paper n_parameters=471_000_000, + memory_usage_mb=1796, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -220,6 +225,7 @@ revision="b207367332321f8e44f96e224ef15bc607f4dbf0", release_date="2021-08-30", n_parameters=22_700_000, + memory_usage_mb=87, embed_dim=384, license="apache-2.0", max_tokens=512, @@ -241,6 +247,7 @@ revision="9a3225965996d404b775526de6dbfe85d3368642", release_date="2021-08-30", n_parameters=109_000_000, + memory_usage_mb=418, embed_dim=768, license="apache-2.0", max_tokens=384, @@ -262,6 +269,7 @@ revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e", release_date="2024-11-10", n_parameters=272_000_000, + memory_usage_mb=1037, embed_dim=1024, license="apache-2.0", max_tokens=2048, diff --git a/mteb/models/siglip_models.py b/mteb/models/siglip_models.py index b7543afc68..cabb3b7794 100644 --- a/mteb/models/siglip_models.py +++ b/mteb/models/siglip_models.py @@ -170,6 +170,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=877_000_000, + memory_usage_mb=3347, max_tokens=16, embed_dim=1152, license="apache-2.0", @@ -194,6 +195,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=878_000_000, + memory_usage_mb=3349, max_tokens=64, embed_dim=1152, license="apache-2.0", @@ -218,6 +220,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=1_130_000_000, + memory_usage_mb=4306, max_tokens=64, embed_dim=1152, license="apache-2.0", @@ -242,6 +245,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=371_000_000, + memory_usage_mb=1414, max_tokens=64, embed_dim=768, license="apache-2.0", @@ -266,6 +270,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=203_000_000, + memory_usage_mb=775, max_tokens=64, embed_dim=768, license="apache-2.0", @@ -290,6 +295,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=204_000_000, + memory_usage_mb=777, max_tokens=64, embed_dim=768, license="apache-2.0", @@ -314,6 +320,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=203_000_000, + memory_usage_mb=776, max_tokens=64, embed_dim=768, license="apache-2.0", @@ -338,6 +345,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=203_000_000, + memory_usage_mb=775, max_tokens=64, embed_dim=768, license="apache-2.0", @@ -362,6 +370,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=652_000_000, + memory_usage_mb=2488, max_tokens=64, embed_dim=1024, license="apache-2.0", @@ -386,6 +395,7 @@ def get_fused_embeddings( release_date="2024-01-08", modalities=["image", "text"], n_parameters=652_000_000, + memory_usage_mb=2489, max_tokens=64, embed_dim=1024, license="apache-2.0", diff --git a/mteb/models/stella_models.py b/mteb/models/stella_models.py index 9cc45a6e02..6b8ec969b1 100644 --- a/mteb/models/stella_models.py +++ b/mteb/models/stella_models.py @@ -22,6 +22,7 @@ revision="1bb50bc7bb726810eac2140e62155b88b0df198f", release_date="2024-07-12", n_parameters=435_000_000, + memory_usage_mb=1660, max_tokens=8192, embed_dim=4096, license="mit", @@ -49,6 +50,7 @@ revision="d03be74b361d4eb24f42a2fe5bd2e29917df4604", release_date="2024-07-12", n_parameters=1_540_000_000, + memory_usage_mb=5887, max_tokens=131072, embed_dim=8960, license="mit", @@ -66,7 +68,8 @@ open_weights=True, revision="d5d39eb8cd11c80a63df53314e59997074469f09", release_date="2024-02-17", - n_parameters=None, # can't see on model card + n_parameters=None, + memory_usage_mb=None, # can't see on model card embed_dim=1792, license="not specified", max_tokens=512, @@ -91,7 +94,8 @@ open_weights=True, revision="82254892a0fba125aa2abf3a4800d2dd12821343", release_date="2024-02-17", - n_parameters=None, # can't see on model card + n_parameters=None, + memory_usage_mb=None, # can't see on model card embed_dim=1792, license="mit", max_tokens=512, @@ -118,6 +122,7 @@ revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe", release_date="2024-02-27", n_parameters=326 * 1e6, + memory_usage_mb=1242, embed_dim=1792, license="mit", max_tokens=512, @@ -139,6 +144,7 @@ revision="b1075144f440ab4409c05622c1179130ebd57d03", release_date="2024-06-04", n_parameters=326 * 1e6, + memory_usage_mb=1242, embed_dim=1792, license="mit", max_tokens=512, diff --git a/mteb/models/text2vec_models.py b/mteb/models/text2vec_models.py index 86a9bcca4f..b0d82327bc 100644 --- a/mteb/models/text2vec_models.py +++ b/mteb/models/text2vec_models.py @@ -29,6 +29,7 @@ # - shibing624/nli-zh-all/text2vec-base-chinese-sentence-dataset # (Could have overlaps I'm not aware of) }, + memory_usage_mb=390, ) text2vec_base_chinese_paraphrase = ModelMeta( @@ -38,6 +39,7 @@ revision="e90c150a9c7fb55a67712a766d6820c55fb83cdd", release_date="2023-06-19", n_parameters=118 * 1e6, + memory_usage_mb=450, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -80,6 +82,7 @@ # So probably best not to. loader=None, n_parameters=118 * 1e6, + memory_usage_mb=449, embed_dim=384, license="apache-2.0", max_tokens=256, diff --git a/mteb/models/uae_models.py b/mteb/models/uae_models.py index d6825be5ea..6edc84c9e5 100644 --- a/mteb/models/uae_models.py +++ b/mteb/models/uae_models.py @@ -10,8 +10,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta - -from .sentence_transformer_wrapper import SentenceTransformerWrapper +from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper logger = logging.getLogger(__name__) @@ -68,6 +67,7 @@ def encode( revision="369c368f70f16a613f19f5598d4f12d9f44235d4", release_date="2023-12-04", # initial commit of hf model. n_parameters=335 * 1e6, + memory_usage_mb=1278, max_tokens=512, embed_dim=1024, license="mit", diff --git a/mteb/models/vista_models.py b/mteb/models/vista_models.py index 1344ec87cd..4448bc0006 100644 --- a/mteb/models/vista_models.py +++ b/mteb/models/vista_models.py @@ -249,6 +249,7 @@ def calculate_probs(self, text_embeddings, image_embeddings): release_date="2024-06-06", modalities=["image", "text"], n_parameters=196_000_000, + memory_usage_mb=748, max_tokens=77, embed_dim=768, license=None, @@ -275,6 +276,7 @@ def calculate_probs(self, text_embeddings, image_embeddings): release_date="2024-06-06", modalities=["image", "text"], n_parameters=None, + memory_usage_mb=None, max_tokens=77, embed_dim=1024, license=None, diff --git a/mteb/models/vlm2vec_models.py b/mteb/models/vlm2vec_models.py index 7ca458c6a0..fbf7bf9f0a 100644 --- a/mteb/models/vlm2vec_models.py +++ b/mteb/models/vlm2vec_models.py @@ -377,6 +377,7 @@ def get_fused_embeddings( release_date="2024-10-08", modalities=["image", "text"], n_parameters=None, + memory_usage_mb=None, max_tokens=131072, embed_dim=3072, license="apache-2.0", @@ -401,6 +402,7 @@ def get_fused_embeddings( release_date="2024-10-08", modalities=["image", "text"], n_parameters=4_150_000_000, + memory_usage_mb=7909, max_tokens=131072, embed_dim=3072, license="apache-2.0", diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 3849ff0147..43de269d64 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -8,10 +8,9 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta +from mteb.models.wrapper import Wrapper from mteb.requires_package import requires_package -from .wrapper import Wrapper - VOYAGE_TRAINING_DATA = { # Self-reported (message from VoyageAI member) # synthetic data @@ -156,6 +155,7 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/05/05/voyage-large-2-instruct-instruction-tuned-and-rank-1-on-mteb/", similarity_fn_name="cosine", @@ -180,6 +180,7 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/06/03/domain-specific-embeddings-finance-edition-voyage-finance-2/", similarity_fn_name="cosine", @@ -204,6 +205,7 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/04/15/domain-specific-embeddings-and-retrieval-legal-edition-voyage-law-2/", similarity_fn_name="cosine", @@ -228,6 +230,7 @@ def _batched_encode( embed_dim=1536, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/01/23/voyage-code-2-elevate-your-code-retrieval/", similarity_fn_name="cosine", @@ -252,6 +255,7 @@ def _batched_encode( embed_dim=1536, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", @@ -276,6 +280,7 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", @@ -299,6 +304,7 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/06/10/voyage-multilingual-2-multilingual-embedding-model/", similarity_fn_name="cosine", @@ -323,6 +329,7 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", @@ -347,6 +354,7 @@ def _batched_encode( embed_dim=512, open_weights=False, n_parameters=None, + memory_usage_mb=None, license=None, reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", @@ -371,6 +379,7 @@ def _batched_encode( embed_dim=2048, open_weights=False, n_parameters=int(6918 * 1e6), + memory_usage_mb=None, license=None, reference="https://huggingface.co/voyageai/voyage-3-m-exp", similarity_fn_name="cosine", diff --git a/mteb/models/voyage_v.py b/mteb/models/voyage_v.py index 6968fec03c..025165cd7d 100644 --- a/mteb/models/voyage_v.py +++ b/mteb/models/voyage_v.py @@ -247,6 +247,7 @@ def get_fused_embeddings( revision="1", release_date="2024-11-10", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=1024, license=None, diff --git a/scripts/calculate_memory_usage.py b/scripts/calculate_memory_usage.py new file mode 100644 index 0000000000..db901c99d1 --- /dev/null +++ b/scripts/calculate_memory_usage.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +"""This script scans all Python files in the "models" directory, imports each module, +identifies variables that are ModelMeta instances, computes their memory_usage_mb via +the calculate_memory_usage_mb method, and then updates the source code in place by +inserting or replacing the "memory_usage_mb" keyword argument in the ModelMeta constructor. +""" + +from __future__ import annotations + +import glob +import importlib.util +import os +import re +from typing import Any + +from tqdm import tqdm + +# IMPORTANT: Adjust the import below to point to the module where ModelMeta is defined. +# For example, if ModelMeta is defined in a file model_meta.py in your package, do: +# from model_meta import ModelMeta +from mteb.model_meta import ModelMeta # <-- Replace with the actual import path + + +def find_matching_paren(text: str, open_index: int) -> int | None: + """Given text and the index of an opening parenthesis, return the index of the + matching closing parenthesis. + """ + count: int = 0 + for i in range(open_index, len(text)): + if text[i] == "(": + count += 1 + elif text[i] == ")": + count -= 1 + if count == 0: + return i + return None + + +def find_modelmeta_call_range(text: str, var_name: str) -> tuple[int, int] | None: + """Given the source text and a variable name, find the range (start, end) + of the ModelMeta constructor call that assigns to that variable. + This function uses a regex to locate the assignment and then uses a + parenthesis matcher to capture the entire call. + """ + pattern: str = rf"^{var_name}\s*=\s*ModelMeta\s*\(" + match = re.search(pattern, text, re.MULTILINE) + if not match: + return None + start: int = match.start() + # Locate the '(' after "ModelMeta" + open_paren_index: int = text.find("(", match.end() - 1) + if open_paren_index == -1: + return None + end_paren_index: int | None = find_matching_paren(text, open_paren_index) + if end_paren_index is None: + return None + # Return the range covering the entire ModelMeta( ... ) call. + return start, end_paren_index + 1 + + +def update_memory_usage_in_call(call_text: str, memory_usage: float | None) -> str: + """Update (or insert) the memory_usage_mb keyword argument in a ModelMeta(...) call. + If memory_usage_mb exists, its value is updated. + Otherwise, it is inserted right after the n_parameters argument. + """ + mem_usage_str: str = str(memory_usage) if memory_usage is not None else "None" + + if "memory_usage_mb" in call_text: + # Update existing memory_usage_mb using a lambda to avoid backreference issues. + updated_call: str = re.sub( + r"(memory_usage_mb\s*=\s*)([^,\)\n]+)", + lambda m: m.group(1) + mem_usage_str, + call_text, + ) + return updated_call + else: + # Try to locate the n_parameters argument to insert after it. + match = re.search(r"(n_parameters\s*=\s*[^,]+,\s*)", call_text) + if match: + insertion_point: int = match.end() + new_param: str = f"memory_usage_mb={mem_usage_str}, " + updated_call: str = ( + call_text[:insertion_point] + new_param + call_text[insertion_point:] + ) + return updated_call + else: + # Fallback: if n_parameters is not found, insert before the closing parenthesis. + stripped: str = call_text.rstrip() + if not stripped.endswith(")"): + return call_text + return call_text[:-1] + f", memory_usage_mb={mem_usage_str}" + call_text[-1] + + +def update_file(file_path: str) -> None: + """For a given Python file, import the module, iterate over its attributes to find + ModelMeta instances, compute memory_usage_mb for each, and update the source + code accordingly. + """ + with open(file_path, encoding="utf-8") as f: + content: str = f.read() + + # Import the module from the file + module_name: str = os.path.splitext(os.path.basename(file_path))[0] + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None or spec.loader is None: + print(f"Could not load module from {file_path}") + return + module = importlib.util.module_from_spec(spec) + try: + spec.loader.exec_module(module) + except Exception as e: + print(f"Error importing {file_path}: {e}") + return + + # List of modifications to apply: each is a tuple (start, end, replacement_text) + modifications: list[tuple[int, int, str]] = [] + for attr_name in tqdm(dir(module), desc=f"Processing {file_path}"): + if attr_name.startswith("__"): + continue + obj: Any = getattr(module, attr_name) + if isinstance(obj, ModelMeta): + # Compute memory_usage_mb via the instance method. + mem_usage: float | None = obj.calculate_memory_usage_mb() + # Find the corresponding ModelMeta(...) call in the source file. + call_range: tuple[int, int] | None = find_modelmeta_call_range( + content, attr_name + ) + if call_range is None: + print(f"Could not find definition for {attr_name} in {file_path}") + continue + start, end = call_range + original_call_text: str = content[start:end] + if "memory_usage_mb" in original_call_text: + continue + updated_call_text: str = update_memory_usage_in_call( + original_call_text, mem_usage + ) + if original_call_text != updated_call_text: + modifications.append((start, end, updated_call_text)) + print( + f"Updating {attr_name} in {file_path}: setting memory_usage_mb={mem_usage}" + ) + + # Apply modifications in reverse order to avoid shifting indices. + if modifications: + modifications.sort(key=lambda mod: mod[0], reverse=True) + for start, end, replacement in modifications: + content = content[:start] + replacement + content[end:] + # Write the updated content back to the file. + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + else: + print(f"No modifications needed for {file_path}") + + +def main() -> None: + """Main function: scans the "models" directory for .py files and updates each.""" + models_dir: str = ( + "../mteb/models" # Change this if your models are in a different folder. + ) + py_files: list[str] = glob.glob(os.path.join(models_dir, "*.py")) + if not py_files: + print(f"No Python files found in {models_dir}") + return + + for file_path in py_files: + update_file(file_path) + + +if __name__ == "__main__": + main() diff --git a/tests/test_models/test_model_meta.py b/tests/test_models/test_model_meta.py new file mode 100644 index 0000000000..89e85dae7e --- /dev/null +++ b/tests/test_models/test_model_meta.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import pytest + +import mteb + + +@pytest.mark.parametrize( + ("model_name", "expected_memory"), + [ + ("intfloat/e5-mistral-7b-instruct", 13563), # multiple safetensors + ("infgrad/jasper_en_vision_language_v1", 3802), # bf16 + ("intfloat/multilingual-e5-small", 449), # safetensors + ("BAAI/bge-m3", 2167), # pytorch_model.bin + ], +) +def test_model_memory_usage(model_name: str, expected_memory: int | None): + meta = mteb.get_model_meta(model_name) + assert meta.memory_usage_mb is not None + used_memory = round(meta.memory_usage_mb) + assert used_memory == expected_memory + + +def test_model_memory_usage_api_model(): + meta = mteb.get_model_meta("openai/text-embedding-3-large") + assert meta.memory_usage_mb is None diff --git a/tests/test_tasks/test_mteb_rerank.py b/tests/test_tasks/test_mteb_rerank.py index dc65dae905..f588d7e18b 100644 --- a/tests/test_tasks/test_mteb_rerank.py +++ b/tests/test_tasks/test_mteb_rerank.py @@ -373,6 +373,7 @@ def test_reranker_same_ndcg1(): revision=ce_revision, release_date="2021-04-15", n_parameters=None, + memory_usage_mb=None, max_tokens=None, embed_dim=None, license=None,