From f2195d9c57b71124c782ebf94e4f64c69b031caa Mon Sep 17 00:00:00 2001 From: ayush1298 Date: Mon, 24 Mar 2025 22:23:49 +0530 Subject: [PATCH 1/2] Add model specific dependencies in pyproject.toml --- mteb/models/blip2_models.py | 16 ++++++++-------- mteb/models/bm25.py | 12 +++++------- mteb/models/cohere_v.py | 11 ++++++----- mteb/models/colbert_models.py | 9 +++------ mteb/models/google_models.py | 12 ++++++------ mteb/models/instruct_wrapper.py | 11 +++++------ mteb/models/jina_models.py | 22 +++++++++------------- mteb/models/llm2clip_models.py | 14 ++++++-------- mteb/models/llm2vec_models.py | 25 +++++++++++++------------ mteb/models/moco_models.py | 9 ++++----- mteb/models/model2vec_models.py | 9 +++------ mteb/models/openai_models.py | 8 ++++---- mteb/models/openclip_models.py | 14 +++++++++----- mteb/models/repllama_models.py | 11 +++++------ mteb/models/rerankers_custom.py | 14 ++++++++------ mteb/models/vlm2vec_models.py | 21 ++++++++++++++------- mteb/models/voyage_models.py | 2 +- mteb/models/voyage_v.py | 21 ++++++++++----------- mteb/requires_package.py | 19 +++++++++++++++++++ pyproject.toml | 10 ++++++++-- 20 files changed, 146 insertions(+), 124 deletions(-) diff --git a/mteb/models/blip2_models.py b/mteb/models/blip2_models.py index 1c6e75b0c2..0314ffcd22 100644 --- a/mteb/models/blip2_models.py +++ b/mteb/models/blip2_models.py @@ -11,17 +11,17 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta +from mteb.requires_package import requires_package def blip2_loader(**kwargs): - try: # a temporal fix for the dependency issues. - from lavis.models.blip2_models.blip2_image_text_matching import ( - Blip2ITM, - ) - except ImportError: - raise ImportError( - "Please install `pip install mteb[blip2]` to use BLIP-2 models." - ) + model_name = kwargs.get("model_name", "BLIP-2") + requires_package( + blip2_loader, "salesforce-lavis", model_name, "pip install 'mteb[blip2]'" + ) + from lavis.models.blip2_models.blip2_image_text_matching import ( + Blip2ITM, + ) class BLIP2ModelWrapper: def __init__( diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index 15e90e0f43..3de9be16e8 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -6,18 +6,16 @@ from mteb.evaluation.evaluators.RetrievalEvaluator import DRESModel from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) def bm25_loader(**kwargs): - try: - import bm25s - import Stemmer - except ImportError: - raise ImportError( - "bm25s or PyStemmer is not installed. Please install it with `pip install mteb[bm25s]`." - ) + model_name = kwargs.get("model_name", "BM25") + requires_package(bm25_loader, "bm25s", model_name, "pip install mteb[bm25s]") + import bm25s + import Stemmer class BM25Search(DRESModel, Wrapper): """BM25 search""" diff --git a/mteb/models/cohere_v.py b/mteb/models/cohere_v.py index b1c8d505d3..b52a31fec8 100644 --- a/mteb/models/cohere_v.py +++ b/mteb/models/cohere_v.py @@ -14,14 +14,15 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.requires_package import requires_image_dependencies +from mteb.requires_package import requires_image_dependencies, requires_package def cohere_v_loader(**kwargs): - try: - import cohere - except ImportError: - raise ImportError("To use cohere models, please run `pip install cohere`.") + model_name = kwargs.get("model_name", "Cohere") + requires_package( + cohere_v_loader, "cohere", model_name, "pip install 'mteb[cohere]'" + ) + import cohere class CohereMultiModalModelWrapper: def __init__( diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py index 628272a045..aea1bcb914 100644 --- a/mteb/models/colbert_models.py +++ b/mteb/models/colbert_models.py @@ -11,6 +11,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) @@ -34,12 +35,8 @@ def __init__( and finally to the specific prompt type. **kwargs: Additional arguments to pass to the model. """ - try: - from pylate import models as colbert_model - except ModuleNotFoundError as e: - raise ModuleNotFoundError( - "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`." - ) from e + requires_package(self, "pylate", model_name, "pip install mteb[pylate]") + from pylate import models as colbert_model self.model_name = model_name self.model = colbert_model.ColBERT(self.model_name, revision=revision, **kwargs) diff --git a/mteb/models/google_models.py b/mteb/models/google_models.py index 162f960eab..cd98e35b45 100644 --- a/mteb/models/google_models.py +++ b/mteb/models/google_models.py @@ -9,6 +9,7 @@ from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package MULTILINGUAL_EVALUATED_LANGUAGES = [ "arb_Arab", @@ -74,12 +75,11 @@ def _embed( """Embeds texts with a pre-trained, foundational model. From https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#generative-ai-get-text-embedding-python_vertex_ai_sdk """ - try: - from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel - except ImportError: - raise ImportError( - "The `vertexai` package is required to run the google API, please install it using `pip install vertexai`" - ) + requires_package( + self, "vertexai", self.model_name, "pip install 'mteb[vertexai]'" + ) + from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel + model = TextEmbeddingModel.from_pretrained(self.model_name) if titles: # Allow title-only embeddings by replacing text with a space diff --git a/mteb/models/instruct_wrapper.py b/mteb/models/instruct_wrapper.py index 2cbf9eccbd..5275419645 100644 --- a/mteb/models/instruct_wrapper.py +++ b/mteb/models/instruct_wrapper.py @@ -10,6 +10,7 @@ from mteb.encoder_interface import PromptType from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) @@ -20,12 +21,10 @@ def instruct_wrapper( instruction_template: str | Callable[[str], str] | None = None, **kwargs, ): - try: - from gritlm import GritLM - except ImportError: - raise ImportError( - f"Please install `pip install mteb[gritlm]` to use {model_name_or_path}." - ) + requires_package( + instruct_wrapper, "gritlm", model_name_or_path, "pip install 'mteb[gritlm]'" + ) + from gritlm import GritLM class InstructWrapper(GritLM, Wrapper): def __init__( diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index 9e6da19986..a9c05c4041 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -12,6 +12,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) @@ -140,19 +141,14 @@ def __init__( raise RuntimeError( f"sentence_transformers version {st_version} is lower than the required version 3.1.0" ) - try: - import einops # noqa: F401 - except ImportError: - raise ImportError( - "To use the jina-embeddings-v3 models `einops` is required. Please install it with `pip install mteb[jina]`." - ) - try: - import flash_attn # noqa: F401 - except ImportError: - logger.warning( - "Using flash_attn for jina-embeddings-v3 models is recommended. Please install it with `pip install mteb[flash_attention]`." - "Fallback to native implementation." - ) + requires_package(self, "jina", model, "pip install 'mteb[jina]'") + import einops # noqa: F401 + + requires_package( + self, "flash_attention", model, "pip install 'mteb[flash_attention]'" + ) + import flash_attn # noqa: F401 + super().__init__(model, revision, model_prompts, **kwargs) def encode( diff --git a/mteb/models/llm2clip_models.py b/mteb/models/llm2clip_models.py index 5c2a17cfe8..8b950867ab 100644 --- a/mteb/models/llm2clip_models.py +++ b/mteb/models/llm2clip_models.py @@ -12,7 +12,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.requires_package import requires_image_dependencies +from mteb.requires_package import requires_image_dependencies, requires_package MODEL2PROCESSOR = { "microsoft/LLM2CLIP-Openai-L-14-336": "openai/clip-vit-large-patch14-336", @@ -22,13 +22,11 @@ def llm2clip_loader(**kwargs): - try: - from llm2vec import LLM2Vec - except ImportError: - # https://github.com/baaivision/EVA/tree/master/EVA-CLIP#setup - raise ImportError( - "To use the LLM2CLIP models `llm2vec` is required. Please install it with `pip install llm2vec`." - ) + model_name = kwargs.get("model_name", "LLM2CLIP") + requires_package( + llm2clip_loader, "llm2vec", model_name, "pip install 'mteb[llm2vec]'" + ) + from llm2vec import LLM2Vec class LLM2CLIPWrapper: def __init__( diff --git a/mteb/models/llm2vec_models.py b/mteb/models/llm2vec_models.py index eb2a8537c0..e8ee5c43e7 100644 --- a/mteb/models/llm2vec_models.py +++ b/mteb/models/llm2vec_models.py @@ -9,6 +9,7 @@ from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package, suggest_package logger = logging.getLogger(__name__) @@ -56,21 +57,21 @@ def __init__( *args, **kwargs, ): - try: - from llm2vec import LLM2Vec - except ImportError: - raise ImportError( - "To use the LLM2Vec models `llm2vec` is required. Please install it with `pip install llm2vec`." - ) + model_name = kwargs.get("model_name", "LLM2Vec") + requires_package(self, "llm2vec", model_name, "pip install 'mteb[llm2vec]'") + from llm2vec import LLM2Vec + extra_kwargs = {} - try: - import flash_attn # noqa + if suggest_package( + self, + "flash_attn", + model_name, + "pip install flash-attn --no-build-isolation", + ): + import flash_attn # noqa: F401 extra_kwargs["attn_implementation"] = "flash_attention_2" - except ImportError: - logger.warning( - "LLM2Vec models were trained with flash attention enabled. For optimal performance, please install the `flash_attn` package with `pip install flash-attn --no-build-isolation`." - ) + self.model_prompts = ( self.validate_task_to_prompt_name(model_prompts) if model_prompts else None ) diff --git a/mteb/models/moco_models.py b/mteb/models/moco_models.py index b88e9805c7..1383447493 100644 --- a/mteb/models/moco_models.py +++ b/mteb/models/moco_models.py @@ -10,14 +10,13 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.requires_package import requires_image_dependencies +from mteb.requires_package import requires_image_dependencies, requires_package def mocov3_loader(**kwargs): - try: - import timm - except ImportError: - raise ImportError("Please install `pip install timm` to use MOCOv3 models.") + model_name = kwargs.get("model_name", "MOCOv3") + requires_package(mocov3_loader, "timm", model_name, "pip install 'mteb[timm]'") + import timm class MOCOv3Wrapper: """A wrapper class for MOCOv3 models that supports image encoding. diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py index ee79f1cafa..c82d411143 100644 --- a/mteb/models/model2vec_models.py +++ b/mteb/models/model2vec_models.py @@ -10,6 +10,7 @@ from mteb.model_meta import ModelMeta from mteb.models.bge_models import bge_training_data from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) @@ -26,12 +27,8 @@ def __init__( model_name: The Model2Vec model to load from HuggingFace Hub. **kwargs: Additional arguments to pass to the wrapper. """ - try: - from model2vec import StaticModel # type: ignore - except ModuleNotFoundError as e: - raise ModuleNotFoundError( - "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`." - ) from e + requires_package(self, "model2vec", model_name, "pip install 'mteb[model2vec]'") + from model2vec import StaticModel # type: ignore self.model_name = model_name self.static_model = StaticModel.from_pretrained(self.model_name) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 630528c983..8588104081 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -29,16 +29,16 @@ def __init__( requires_package( self, "openai", - "Openai text embedding", - install_instruction="pip install mteb[openai]", + model_name, + install_instruction="pip install 'mteb[openai]'", ) from openai import OpenAI requires_package( self, "tiktoken", - "Tiktoken package", - install_instruction="pip install mteb[openai]", + model_name, + install_instruction="pip install 'mteb[openai]'", ) import tiktoken diff --git a/mteb/models/openclip_models.py b/mteb/models/openclip_models.py index 8399fd4f64..2953cfe206 100644 --- a/mteb/models/openclip_models.py +++ b/mteb/models/openclip_models.py @@ -10,14 +10,18 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.requires_package import requires_image_dependencies +from mteb.requires_package import requires_image_dependencies, requires_package def openclip_loader(**kwargs): - try: - import open_clip - except ImportError: - raise ImportError("Please run `pip install open_clip_torch`.") + model_name = kwargs.get("model_name", "CLIP-ViT") + requires_package( + openclip_loader, + "open_clip_torch", + model_name, + "pip install 'mteb[open_clip_torch]'", + ) + import open_clip class OpenCLIPWrapper: def __init__( diff --git a/mteb/models/repllama_models.py b/mteb/models/repllama_models.py index 8631af927b..549f231c93 100644 --- a/mteb/models/repllama_models.py +++ b/mteb/models/repllama_models.py @@ -12,6 +12,7 @@ from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) @@ -26,12 +27,10 @@ def __init__( model_prompts: dict[str, str] | None = None, **kwargs, ): - try: - from peft import PeftModel - except ImportError: - raise ImportError( - "To use the RepLLaMA based models `peft` is required. Please install it with `pip install 'mteb[peft]'`." - ) + requires_package( + self, "peft", peft_model_name_or_path, "pip install 'mteb[peft]'" + ) + from peft import PeftModel self.base_model = AutoModel.from_pretrained( base_model_name_or_path, diff --git a/mteb/models/rerankers_custom.py b/mteb/models/rerankers_custom.py index 83c054c9c9..32a2534c0e 100644 --- a/mteb/models/rerankers_custom.py +++ b/mteb/models/rerankers_custom.py @@ -12,6 +12,7 @@ from mteb.evaluation.evaluators.RetrievalEvaluator import DenseRetrievalExactSearch from mteb.model_meta import ModelMeta from mteb.models.bge_models import bge_m3_training_data +from mteb.requires_package import requires_package logger = logging.getLogger(__name__) @@ -61,12 +62,13 @@ def __init__( if self.fp_options: model_args["torch_dtype"] = self.fp_options - try: - from FlagEmbedding import FlagReranker - except ImportError: - raise ImportError( - "FlagEmbedding is not installed. Please install it via `pip install mteb[flagembedding]`" - ) + requires_package( + self, + "flagembedding", + model_name_or_path, + "pip install 'mteb[flagembedding]'", + ) + from FlagEmbedding import FlagReranker self.model = FlagReranker(model_name_or_path, use_fp16=True) diff --git a/mteb/models/vlm2vec_models.py b/mteb/models/vlm2vec_models.py index 1d629b86c9..a630a57d2f 100644 --- a/mteb/models/vlm2vec_models.py +++ b/mteb/models/vlm2vec_models.py @@ -12,7 +12,11 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.requires_package import requires_image_dependencies +from mteb.requires_package import ( + requires_image_dependencies, + requires_package, + suggest_package, +) logging.basicConfig(level=logging.WARNING) logger = logging.getLogger(__name__) @@ -30,13 +34,16 @@ def __init__( **kwargs, ): requires_image_dependencies() - try: + if suggest_package( + self, + "flash_attn", + model_name, + "pip install flash-attn --no-build-isolation", + ): import flash_attn # noqa - from peft import LoraConfig, PeftModel # noqa - except ImportError: - logger.warning( - "VLM2Vec models were trained with flash attention enabled. For optimal performance, please install the `flash_attn` package with `pip install flash-attn --no-build-isolation`." - ) + + requires_package(self, "peft", model_name, "pip install 'mteb[peft]'") + from peft import LoraConfig, PeftModel # noqa self.pooling = "last" self.normalize = True diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 47fe91323b..b1eb33442a 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -78,7 +78,7 @@ def __init__( model_prompts: dict[str, str] | None = None, **kwargs, ) -> None: - requires_package(self, "voyageai", "Voyage") + requires_package(self, "voyageai", model_name, "pip install 'mteb[voyageai]'") import voyageai self._client = voyageai.Client(max_retries=max_retries) diff --git a/mteb/models/voyage_v.py b/mteb/models/voyage_v.py index 96e7ff9997..48a083574c 100644 --- a/mteb/models/voyage_v.py +++ b/mteb/models/voyage_v.py @@ -11,7 +11,7 @@ from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.requires_package import requires_image_dependencies +from mteb.requires_package import requires_image_dependencies, requires_package def downsample_image( @@ -46,16 +46,15 @@ def downsample_image( def voyage_v_loader(**kwargs): - try: - import voyageai - except ImportError: - raise ImportError("To use voyage models, please run `pip install -U voyageai`.") - try: - from tenacity import retry, stop_after_attempt, wait_exponential - except ImportError: - raise ImportError( - "please run `pip install tenacity` to use exponential backoff." - ) + model_name = kwargs.get("model_name", "Voyage vision") + requires_package( + voyage_v_loader, + "voyageai and tenacity", + model_name, + "pip install 'mteb[voyage_v]'", + ) + import voyageai + from tenacity import retry, stop_after_attempt, wait_exponential class VoyageMultiModalModelWrapper: def __init__( diff --git a/mteb/requires_package.py b/mteb/requires_package.py index d261acdffb..22b6ddebf7 100644 --- a/mteb/requires_package.py +++ b/mteb/requires_package.py @@ -1,6 +1,9 @@ from __future__ import annotations import importlib.util +import logging + +logger = logging.getLogger(__name__) def _is_package_available(pkg_name: str) -> bool: @@ -24,6 +27,22 @@ def requires_package( ) +def suggest_package( + obj, package_name: str, model_name: str, install_instruction: str +) -> bool: + """Check if a package is available and log a warning with installation instructions if it's not. + Unlike requires_package, this doesn't raise an error but returns True if the package is available. + """ + if not _is_package_available(package_name): + name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__ + logger.warning( + f"{name} can benefit from the `{package_name}` library but it was not found in your environment. " + + f"{model_name} models were trained with flash attention enabled. For optimal performance, please install the `{package_name}` package with `{install_instruction}`." + ) + return False + return True + + def requires_image_dependencies() -> None: if not _is_package_available("torchvision"): raise ImportError( diff --git a/pyproject.toml b/pyproject.toml index 16f3b865b9..950fc9152d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ leaderboard = [ "cachetools>=5.2.0", "matplotlib>=3.9.4", ] -flagembedding = ["FlagEmbedding"] +flagembedding = ["FlagEmbedding==1.3.4"] jina = ["einops>=0.8.0"] flash_attention = ["flash-attn>=2.6.3"] openai = ["openai>=1.41.0", "tiktoken>=0.8.0"] @@ -82,7 +82,13 @@ bm25s = ["bm25s>=0.2.6", "PyStemmer>=2.2.0.3"] gritlm = ["gritlm>=1.0.2"] xformers = ["xformers>=0.0.29"] blip2 = ["salesforce-lavis>=1.0.2"] - +voyageai = ["voyageai>=1.0.0,<2.0.0"] +voyage_v = ["voyageai>1.0.0,<2.0.0", "tenacity>1.0.0,<2.0.0"] +cohere = ["cohere==5.14.0"] +vertexai = ["vertexai==1.71.1"] +ll2vec = ["ll2vec==0.2.3"] +timm = ["timm==1.0.15"] +open_clip_torch = ["open_clip_torch==2.31.0"] [tool.coverage.report] From 453c2848099e304f7120898ac94b586795f61d58 Mon Sep 17 00:00:00 2001 From: ayush1298 Date: Tue, 25 Mar 2025 19:35:45 +0530 Subject: [PATCH 2/2] Update documentation --- docs/adding_a_model.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/adding_a_model.md b/docs/adding_a_model.md index cb1dd09665..b9e39ce436 100644 --- a/docs/adding_a_model.md +++ b/docs/adding_a_model.md @@ -133,3 +133,17 @@ model = ModelMeta( ... ) ``` + +##### Adding model dependencies in pyproject.toml +If your are adding a model that requires additional dependencies, you can add them to the `pyproject.toml` file and instead of checking whether dependencies are installed or not make use of `requires_package` from [requires_package.py](../mteb/requires_packages.py). For example: + +In the [voyage_models.py](../mteb/models/voyage_models.py) file, we have added the following code: +```python +requires_package(self, "voyageai", model_name, "pip install 'mteb[voyageai]'") +``` +and also updated [pyproject.toml]((../pyproject.toml)) file with the following code: +```python +voyageai = ["voyageai>=1.0.0,<2.0.0"] +``` +so that it will check whether voyageai is installed or not. If not, then it will give an error message to install voyageai. This has done so as to give clear installation warnings. +If you want to give suggestion instead of warning, you can use `suggest_package` from [requires_package.py](../mteb/requires_packages.py). \ No newline at end of file