From a8bd2ded74df10e75f3e64b42b2e2457fe2efa78 Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Sun, 28 Dec 2025 21:10:29 +0100
Subject: [PATCH 01/20] model: Add SauerkrautLM-ColPali visual document
 retrieval models

Add inference code and requirements for SauerkrautLM-ColPali visual document retrieval models.

These are multi-vector embedding models based on the ColPali architecture:
- ColQwen3 (Qwen3-VL backbone): 1.7B Turbo, 2B, 4B, 8B variants
- ColLFM2 (LFM2-VL backbone): 450M variant
- ColMinistral3 (Ministral3 backbone): 3B variant

All models produce 128-dimensional embeddings per text/image token and use MaxSim (late interaction) for retrieval scoring.

Model checkpoints:
- https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1
- https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1
- https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1
- https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1
- https://huggingface.co/VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1
- https://huggingface.co/VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1
---
 .../model_implementations/slm_models.py       | 484 ++++++++++++++++++
 pyproject.toml                                |   4 +
 2 files changed, 488 insertions(+)
 create mode 100644 mteb/models/model_implementations/slm_models.py

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
new file mode 100644
index 0000000000..70230aab89
--- /dev/null
+++ b/mteb/models/model_implementations/slm_models.py
@@ -0,0 +1,484 @@
+"""
+SauerkrautLM Visual Document Retrieval Models - MTEB Integration
+
+This module provides MTEB wrappers for SauerkrautLM ColPali-style models:
+- SLM-ColQwen3 (Qwen3-VL backbone)
+- SLM-ColLFM2 (LFM2 backbone)
+- SLM-ColMinistral3 (Ministral3 backbone)
+
+Based on:
+- MTEB ColPali implementation: mteb/models/model_implementations/colpali_models.py
+"""
+
+from __future__ import annotations
+
+import logging
+from functools import partial
+from typing import Any
+
+import torch
+from PIL import Image
+from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
+
+from mteb._requires_package import (
+    requires_image_dependencies,
+    requires_package,
+)
+from mteb.abstasks.task_metadata import TaskMetadata
+from mteb.models.abs_encoder import AbsEncoder
+from mteb.models.model_meta import ModelMeta, ScoringFunction
+from mteb.types import Array, BatchedInput, PromptType
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Supported Languages
+# =============================================================================
+
+SUPPORTED_LANGUAGES = [
+    "eng-Latn",  # English
+    "deu-Latn",  # German
+    "fra-Latn",  # French
+    "spa-Latn",  # Spanish
+    "ita-Latn",  # Italian
+    "por-Latn",  # Portuguese
+]
+
+
+# =============================================================================
+# Base Wrapper Class
+# =============================================================================
+
+class SLMBaseWrapper(AbsEncoder):
+    """
+    Base wrapper for SauerkrautLM multi-vector embedding models.
+    
+    All our models use late interaction (MaxSim) for retrieval scoring.
+    """
+    
+    model_class = None
+    processor_class = None
+    model_name_prefix = "SLM"
+
+    def __init__(
+        self,
+        model_name: str,
+        revision: str | None = None,
+        device: str | None = None,
+        use_flash_attn: bool = True,
+        **kwargs,
+    ):
+        requires_image_dependencies()
+        requires_package(
+            self, "sauerkrautlm_colpali", model_name, "pip install sauerkrautlm-colpali"
+        )
+        
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
+
+    def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
+        """Override in subclasses to load specific model/processor."""
+        raise NotImplementedError
+
+    def encode(
+        self,
+        inputs: DataLoader[BatchedInput],
+        *,
+        task_metadata: TaskMetadata,
+        hf_split: str,
+        hf_subset: str,
+        prompt_type: PromptType | None = None,
+        **kwargs: Any,
+    ) -> Array:
+        text_embeddings = None
+        image_embeddings = None
+        
+        if "text" in inputs.dataset.features:
+            text_embeddings = self.get_text_embeddings(inputs, **kwargs)
+        if "image" in inputs.dataset.features:
+            image_embeddings = self.get_image_embeddings(inputs, **kwargs)
+
+        if text_embeddings is not None and image_embeddings is not None:
+            if len(text_embeddings) != len(image_embeddings):
+                raise ValueError(
+                    "The number of texts and images must have the same length"
+                )
+            fused_embeddings = text_embeddings + image_embeddings
+            return fused_embeddings
+        elif text_embeddings is not None:
+            return text_embeddings
+        elif image_embeddings is not None:
+            return image_embeddings
+        raise ValueError("No text or image features found in inputs")
+
+    def encode_input(self, inputs):
+        """Forward pass through the model."""
+        return self.mdl(**inputs)
+
+    def _move_to_device(self, inputs: dict) -> dict:
+        """Move all tensor inputs to the model's device."""
+        result = {}
+        for k, v in inputs.items():
+            if isinstance(v, torch.Tensor):
+                result[k] = v.to(self.device)
+            else:
+                result[k] = v
+        return result
+
+    def get_image_embeddings(
+        self,
+        images: DataLoader,
+        batch_size: int = 32,
+        **kwargs,
+    ) -> torch.Tensor:
+        import torchvision.transforms.functional as F
+
+        all_embeds = []
+
+        with torch.no_grad():
+            for batch in tqdm(images, desc="Encoding images"):
+                imgs = [
+                    F.to_pil_image(b)
+                    if not isinstance(b, Image.Image)
+                    else b
+                    for b in batch["image"]
+                ]
+                inputs = self.processor.process_images(imgs)
+                inputs = self._move_to_device(inputs)
+                outs = self.encode_input(inputs)
+                all_embeds.extend(outs.cpu().to(torch.float32))
+
+        padded = torch.nn.utils.rnn.pad_sequence(
+            all_embeds, batch_first=True, padding_value=0
+        )
+        return padded
+
+    def get_text_embeddings(
+        self,
+        texts: DataLoader,
+        batch_size: int = 32,
+        **kwargs,
+    ) -> torch.Tensor:
+        all_embeds = []
+        
+        with torch.no_grad():
+            for batch in tqdm(texts, desc="Encoding texts"):
+                inputs = self.processor.process_queries(batch["text"])
+                inputs = self._move_to_device(inputs)
+                outs = self.encode_input(inputs)
+                all_embeds.extend(outs.cpu().to(torch.float32))
+
+        padded = torch.nn.utils.rnn.pad_sequence(
+            all_embeds, batch_first=True, padding_value=0
+        )
+        return padded
+
+    def get_fused_embeddings(
+        self,
+        texts: list[str] | None = None,
+        images: list[Image.Image] | DataLoader | None = None,
+        *,
+        task_name: str | None = None,
+        prompt_type: PromptType | None = None,
+        batch_size: int = 32,
+        fusion_mode: str = "sum",
+        **kwargs: Any,
+    ):
+        raise NotImplementedError(
+            "Fused embeddings are not supported. "
+            "Please use get_text_embeddings or get_image_embeddings."
+        )
+
+    def calculate_probs(
+        self, 
+        text_embeddings: torch.Tensor, 
+        image_embeddings: torch.Tensor,
+    ) -> torch.Tensor:
+        scores = self.similarity(text_embeddings, image_embeddings).T
+        return scores.softmax(dim=-1)
+
+    def similarity(
+        self, 
+        a: torch.Tensor | list, 
+        b: torch.Tensor | list,
+    ) -> torch.Tensor:
+        return self.processor.score(a, b, device=self.device)
+
+
+# =============================================================================
+# ColQwen3 Wrapper
+# =============================================================================
+
+class SLMColQwen3Wrapper(SLMBaseWrapper):
+    """Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
+
+    def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
+        from sauerkrautlm_colpali.models.qwen3.colqwen3 import ColQwen3, ColQwen3Processor
+
+        self.mdl = ColQwen3.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            attn_implementation="flash_attention_2" if use_flash_attn else "eager",
+            revision=revision,
+            **kwargs,
+        )
+        # Explicitly move to device
+        self.mdl = self.mdl.to(self.device)
+        self.mdl.eval()
+
+        self.processor = ColQwen3Processor.from_pretrained(
+            model_name,
+            revision=revision,
+        )
+        
+        logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
+
+
+# =============================================================================
+# ColLFM2 Wrapper
+# =============================================================================
+
+class SLMColLFM2Wrapper(SLMBaseWrapper):
+    """Wrapper for SLM-ColLFM2 models (LFM2 backbone)."""
+
+    def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
+        from sauerkrautlm_colpali.models.lfm2.collfm2 import ColLFM2, ColLFM2Processor
+
+        self.mdl = ColLFM2.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            revision=revision,
+            **kwargs,
+        )
+        # Explicitly move to device
+        self.mdl = self.mdl.to(self.device)
+        self.mdl.eval()
+
+        self.processor = ColLFM2Processor.from_pretrained(
+            model_name,
+            revision=revision,
+        )
+        
+        logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
+
+
+# =============================================================================
+# ColMinistral3 Wrapper
+# =============================================================================
+
+class SLMColMinistral3Wrapper(SLMBaseWrapper):
+    """Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
+
+    def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
+        from sauerkrautlm_colpali.models.ministral3.colministral3 import ColMinistral3, ColMinistral3Processor
+
+        # ColMinistral3.__init__ doesn't accept extra kwargs - only pass model_name
+        self.mdl = ColMinistral3.from_pretrained(model_name)
+        # Explicitly move to device and convert to bfloat16
+        self.mdl = self.mdl.to(dtype=torch.bfloat16, device=self.device)
+        self.mdl.eval()
+
+        self.processor = ColMinistral3Processor.from_pretrained(model_name)
+        
+        logger.info(f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}")
+
+
+# =============================================================================
+# Loader Functions
+# =============================================================================
+
+def slm_colqwen3_loader(model_name: str, revision: str | None = None, device: str | None = None, **kwargs) -> SLMColQwen3Wrapper:
+    return SLMColQwen3Wrapper(model_name=model_name, revision=revision, device=device, **kwargs)
+
+def slm_collfm2_loader(model_name: str, revision: str | None = None, device: str | None = None, **kwargs) -> SLMColLFM2Wrapper:
+    return SLMColLFM2Wrapper(model_name=model_name, revision=revision, device=device, **kwargs)
+
+def slm_colministral3_loader(model_name: str, revision: str | None = None, device: str | None = None, **kwargs) -> SLMColMinistral3Wrapper:
+    return SLMColMinistral3Wrapper(model_name=model_name, revision=revision, device=device, **kwargs)
+
+
+# =============================================================================
+# Citations
+# =============================================================================
+
+SAUERKRAUTLM_CITATION = """
+@misc{sauerkrautlm-colpali-2025,
+  title={SauerkrautLM-ColPali: Multi-Vector Vision Retrieval Models},
+  author={David Golchinfar},
+  organization={VAGO Solutions},
+  year={2025},
+  url={https://github.com/VAGOsolutions/sauerkrautlm-colpali}
+}
+"""
+
+COLPALI_CITATION = """
+@misc{faysse2024colpali,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Omrani, Bilel and Viaud, Gautier and Hudelot, C\\'eline and Colombo, Pierre},
+  year={2024},
+  eprint={2407.01449},
+  archivePrefix={arXiv},
+  primaryClass={cs.IR}
+}
+"""
+
+
+# =============================================================================
+# ColQwen3 Model Metadata
+# =============================================================================
+
+# ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
+slm_colqwen3_1_7b_turbo = ModelMeta(
+    loader=partial(slm_colqwen3_loader),
+    name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
+    languages=SUPPORTED_LANGUAGES,
+    revision="main",
+    release_date="2025-01-01",
+    modalities=["image", "text"],
+    n_parameters=1_700_000_000,
+    memory_usage_mb=3400,
+    max_tokens=262144,
+    embed_dim=128,
+    license="apache-2.0",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["ColPali"],
+    reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
+    similarity_fn_name=ScoringFunction.MAX_SIM,
+    use_instructions=True,
+    training_datasets=None,
+    citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
+)
+
+# ColQwen3-2B: ~2.2B params → 4.4 GB VRAM in bfloat16
+slm_colqwen3_2b = ModelMeta(
+    loader=partial(slm_colqwen3_loader),
+    name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
+    languages=SUPPORTED_LANGUAGES,
+    revision="main",
+    release_date="2025-01-01",
+    modalities=["image", "text"],
+    n_parameters=2_200_000_000,
+    memory_usage_mb=4400,
+    max_tokens=262144,
+    embed_dim=128,
+    license="apache-2.0",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["ColPali"],
+    reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
+    similarity_fn_name=ScoringFunction.MAX_SIM,
+    use_instructions=True,
+    training_datasets=None,
+    citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
+)
+
+# ColQwen3-4B: ~4B params → 8 GB VRAM in bfloat16
+slm_colqwen3_4b = ModelMeta(
+    loader=partial(slm_colqwen3_loader),
+    name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
+    languages=SUPPORTED_LANGUAGES,
+    revision="main",
+    release_date="2025-01-01",
+    modalities=["image", "text"],
+    n_parameters=4_000_000_000,
+    memory_usage_mb=8000,
+    max_tokens=262144,
+    embed_dim=128,
+    license="apache-2.0",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["ColPali"],
+    reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
+    similarity_fn_name=ScoringFunction.MAX_SIM,
+    use_instructions=True,
+    training_datasets=None,
+    citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
+)
+
+# ColQwen3-8B: ~8B params → 16 GB VRAM in bfloat16
+slm_colqwen3_8b = ModelMeta(
+    loader=partial(slm_colqwen3_loader),
+    name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
+    languages=SUPPORTED_LANGUAGES,
+    revision="main",
+    release_date="2025-01-01",
+    modalities=["image", "text"],
+    n_parameters=8_000_000_000,
+    memory_usage_mb=16000,
+    max_tokens=262144,
+    embed_dim=128,
+    license="apache-2.0",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["ColPali"],
+    reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
+    similarity_fn_name=ScoringFunction.MAX_SIM,
+    use_instructions=True,
+    training_datasets=None,
+    citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
+)
+
+
+# =============================================================================
+# ColLFM2 Model Metadata
+# =============================================================================
+
+# ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
+slm_collfm2_450m = ModelMeta(
+    loader=partial(slm_collfm2_loader),
+    name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
+    languages=SUPPORTED_LANGUAGES,
+    revision="main",
+    release_date="2025-01-01",
+    modalities=["image", "text"],
+    n_parameters=450_000_000,
+    memory_usage_mb=900,
+    max_tokens=32768,
+    embed_dim=128,
+    license="https://huggingface.co/LiquidAI/LFM2-VL-450M/blob/main/LICENSE",  # LiquidAI LFM 1.0 License
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["ColPali"],
+    reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
+    similarity_fn_name=ScoringFunction.MAX_SIM,
+    use_instructions=True,
+    training_datasets=None,
+    citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
+)
+
+
+# =============================================================================
+# ColMinistral3 Model Metadata
+# =============================================================================
+
+# ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
+slm_colministral3_3b = ModelMeta(
+    loader=partial(slm_colministral3_loader),
+    name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
+    languages=SUPPORTED_LANGUAGES,
+    revision="main",
+    release_date="2025-01-01",
+    modalities=["image", "text"],
+    n_parameters=3_000_000_000,
+    memory_usage_mb=6000,
+    max_tokens=262144,
+    embed_dim=128,
+    license="apache-2.0",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["ColPali"],
+    reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
+    similarity_fn_name=ScoringFunction.MAX_SIM,
+    use_instructions=True,
+    training_datasets=None,
+    citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
+)
diff --git a/pyproject.toml b/pyproject.toml
index 6fe28affd7..8d6fb01e99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,6 +93,7 @@ nomic = ["einops>=0.8.1"]
 ark = ["volcengine-python-sdk[ark]==3.0.2", "tiktoken>=0.8.0"]
 colpali_engine = ["colpali_engine>=0.3.12"]
 colqwen3 = ["transformers>=4.57", "torchvision>=0.22.1"]
+sauerkrautlm-colpali = ["transformers>=4.47.0", "torch>=2.0.0", "sauerkrautlm-colpali @ git+https://github.com/VAGOsolutions/sauerkrautlm-colpali.git"]
 xet = ["huggingface_hub>=0.32.0"]
 youtu = ["tencentcloud-sdk-python-common>=3.0.1454", "tencentcloud-sdk-python-lkeap>=3.0.1451"]
 llama-embed-nemotron = ["transformers==4.51.0"]
@@ -327,6 +328,9 @@ conflicts = [
     [{ extra = "colqwen3" }, { extra = "llama-embed-nemotron" }], # conflicting versions of transformers
     [{ extra = "jina-v4" }, { extra = "llm2vec" }],
     [{ extra = "jina-v4" }, { extra = "llama-embed-nemotron" }], # conflicting versions of transformers
+    [{ extra = "sauerkrautlm-colpali" }, { extra = "pylate" }],
+    [{ extra = "sauerkrautlm-colpali" }, { extra = "llm2vec" }],
+    [{ extra = "sauerkrautlm-colpali" }, { extra = "llama-embed-nemotron" }],
 ]
 
 [tool.uv.extra-build-dependencies]

From 952543b107195f6cce460a698b77b40e29bfd426 Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Sun, 28 Dec 2025 21:46:53 +0100
Subject: [PATCH 02/20] fix: Address review comments

- Remove loader functions, use classes directly in ModelMeta
- Remove unused get_fused_embeddings method
- Move model.to(device) and model.eval() to base class __init__
- Pass torch_dtype directly to ColMinistral3.from_pretrained
---
 .../model_implementations/slm_models.py       | 59 ++++---------------
 1 file changed, 12 insertions(+), 47 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 70230aab89..658001258f 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -77,6 +77,8 @@ def __init__(
         
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
+        self.mdl = self.mdl.to(self.device)
+        self.mdl.eval()
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
         """Override in subclasses to load specific model/processor."""
@@ -175,22 +177,6 @@ def get_text_embeddings(
         )
         return padded
 
-    def get_fused_embeddings(
-        self,
-        texts: list[str] | None = None,
-        images: list[Image.Image] | DataLoader | None = None,
-        *,
-        task_name: str | None = None,
-        prompt_type: PromptType | None = None,
-        batch_size: int = 32,
-        fusion_mode: str = "sum",
-        **kwargs: Any,
-    ):
-        raise NotImplementedError(
-            "Fused embeddings are not supported. "
-            "Please use get_text_embeddings or get_image_embeddings."
-        )
-
     def calculate_probs(
         self, 
         text_embeddings: torch.Tensor, 
@@ -224,9 +210,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             revision=revision,
             **kwargs,
         )
-        # Explicitly move to device
-        self.mdl = self.mdl.to(self.device)
-        self.mdl.eval()
 
         self.processor = ColQwen3Processor.from_pretrained(
             model_name,
@@ -252,9 +235,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             revision=revision,
             **kwargs,
         )
-        # Explicitly move to device
-        self.mdl = self.mdl.to(self.device)
-        self.mdl.eval()
 
         self.processor = ColLFM2Processor.from_pretrained(
             model_name,
@@ -274,31 +254,16 @@ class SLMColMinistral3Wrapper(SLMBaseWrapper):
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
         from sauerkrautlm_colpali.models.ministral3.colministral3 import ColMinistral3, ColMinistral3Processor
 
-        # ColMinistral3.__init__ doesn't accept extra kwargs - only pass model_name
-        self.mdl = ColMinistral3.from_pretrained(model_name)
-        # Explicitly move to device and convert to bfloat16
-        self.mdl = self.mdl.to(dtype=torch.bfloat16, device=self.device)
-        self.mdl.eval()
+        self.mdl = ColMinistral3.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+        )
 
         self.processor = ColMinistral3Processor.from_pretrained(model_name)
         
         logger.info(f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
-# =============================================================================
-# Loader Functions
-# =============================================================================
-
-def slm_colqwen3_loader(model_name: str, revision: str | None = None, device: str | None = None, **kwargs) -> SLMColQwen3Wrapper:
-    return SLMColQwen3Wrapper(model_name=model_name, revision=revision, device=device, **kwargs)
-
-def slm_collfm2_loader(model_name: str, revision: str | None = None, device: str | None = None, **kwargs) -> SLMColLFM2Wrapper:
-    return SLMColLFM2Wrapper(model_name=model_name, revision=revision, device=device, **kwargs)
-
-def slm_colministral3_loader(model_name: str, revision: str | None = None, device: str | None = None, **kwargs) -> SLMColMinistral3Wrapper:
-    return SLMColMinistral3Wrapper(model_name=model_name, revision=revision, device=device, **kwargs)
-
-
 # =============================================================================
 # Citations
 # =============================================================================
@@ -331,7 +296,7 @@ def slm_colministral3_loader(model_name: str, revision: str | None = None, devic
 
 # ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
-    loader=partial(slm_colqwen3_loader),
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",
@@ -355,7 +320,7 @@ def slm_colministral3_loader(model_name: str, revision: str | None = None, devic
 
 # ColQwen3-2B: ~2.2B params → 4.4 GB VRAM in bfloat16
 slm_colqwen3_2b = ModelMeta(
-    loader=partial(slm_colqwen3_loader),
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",
@@ -379,7 +344,7 @@ def slm_colministral3_loader(model_name: str, revision: str | None = None, devic
 
 # ColQwen3-4B: ~4B params → 8 GB VRAM in bfloat16
 slm_colqwen3_4b = ModelMeta(
-    loader=partial(slm_colqwen3_loader),
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",
@@ -403,7 +368,7 @@ def slm_colministral3_loader(model_name: str, revision: str | None = None, devic
 
 # ColQwen3-8B: ~8B params → 16 GB VRAM in bfloat16
 slm_colqwen3_8b = ModelMeta(
-    loader=partial(slm_colqwen3_loader),
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",
@@ -432,7 +397,7 @@ def slm_colministral3_loader(model_name: str, revision: str | None = None, devic
 
 # ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
 slm_collfm2_450m = ModelMeta(
-    loader=partial(slm_collfm2_loader),
+    loader=partial(SLMColLFM2Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",
@@ -461,7 +426,7 @@ def slm_colministral3_loader(model_name: str, revision: str | None = None, devic
 
 # ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
 slm_colministral3_3b = ModelMeta(
-    loader=partial(slm_colministral3_loader),
+    loader=partial(SLMColMinistral3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From 95ce3cfda1b271cf648403d6d9dfb487e0a6d60e Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:54:25 +0100
Subject: [PATCH 03/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 658001258f..69d123ee68 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -426,7 +426,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
 slm_colministral3_3b = ModelMeta(
-    loader=partial(SLMColMinistral3Wrapper),
+    loader=SLMColMinistral3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From 50856f6ffb922f6c4983e2b14a487e999890f51e Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:54:42 +0100
Subject: [PATCH 04/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 69d123ee68..b41c5ce9cb 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -397,7 +397,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
 slm_collfm2_450m = ModelMeta(
-    loader=partial(SLMColLFM2Wrapper),
+    loader=SLMColLFM2Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From 7658b354785b25cc67306d04836f177dfe2b54a2 Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:54:56 +0100
Subject: [PATCH 05/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index b41c5ce9cb..7a66a47a05 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -368,7 +368,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-8B: ~8B params → 16 GB VRAM in bfloat16
 slm_colqwen3_8b = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From 1a393c350c8fce522c86d5c3e80c112ca911b46a Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:55:12 +0100
Subject: [PATCH 06/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 7a66a47a05..1e5bb59eb2 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -344,7 +344,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-4B: ~4B params → 8 GB VRAM in bfloat16
 slm_colqwen3_4b = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From de7445e7ae19451747abe54d0aa4e490834f1b0e Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:55:22 +0100
Subject: [PATCH 07/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 1e5bb59eb2..8ad2a85b6a 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -320,7 +320,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-2B: ~2.2B params → 4.4 GB VRAM in bfloat16
 slm_colqwen3_2b = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From 60f8176087444a44c30f1ae99b8311e82d16a2bd Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:55:40 +0100
Subject: [PATCH 08/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 8ad2a85b6a..dad779a91f 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -296,7 +296,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="main",

From dba097e191710b158999a5800f48c9ed1527a701 Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Sun, 28 Dec 2025 21:55:59 +0100
Subject: [PATCH 09/20] Update pyproject.toml

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8d6fb01e99..2a6a1edc18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,7 +93,7 @@ nomic = ["einops>=0.8.1"]
 ark = ["volcengine-python-sdk[ark]==3.0.2", "tiktoken>=0.8.0"]
 colpali_engine = ["colpali_engine>=0.3.12"]
 colqwen3 = ["transformers>=4.57", "torchvision>=0.22.1"]
-sauerkrautlm-colpali = ["transformers>=4.47.0", "torch>=2.0.0", "sauerkrautlm-colpali @ git+https://github.com/VAGOsolutions/sauerkrautlm-colpali.git"]
+sauerkrautlm-colpali = ["sauerkrautlm-colpali @ git+https://github.com/VAGOsolutions/sauerkrautlm-colpali.git"]
 xet = ["huggingface_hub>=0.32.0"]
 youtu = ["tencentcloud-sdk-python-common>=3.0.1454", "tencentcloud-sdk-python-lkeap>=3.0.1451"]
 llama-embed-nemotron = ["transformers==4.51.0"]

From 6b738ee4d12c62d42087135419c89826c76e416b Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Sun, 28 Dec 2025 22:00:09 +0100
Subject: [PATCH 10/20] fix: Update release_date to 2025-12-20

---
 .../model_implementations/slm_models.py       | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index dad779a91f..1fbaa59147 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -296,11 +296,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
-    loader=SLMColQwen3Wrapper,
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
     languages=SUPPORTED_LANGUAGES,
-    revision="main",
-    release_date="2025-01-01",
+    revision="19c295a18e057d6d82754f627c09408117ffdb66",
+    release_date="2025-12-20",
     modalities=["image", "text"],
     n_parameters=1_700_000_000,
     memory_usage_mb=3400,
@@ -320,11 +320,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-2B: ~2.2B params → 4.4 GB VRAM in bfloat16
 slm_colqwen3_2b = ModelMeta(
-    loader=SLMColQwen3Wrapper,
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
     languages=SUPPORTED_LANGUAGES,
-    revision="main",
-    release_date="2025-01-01",
+    revision="48f699713c10af754684e12060a2af9266462cc9",
+    release_date="2025-12-20",
     modalities=["image", "text"],
     n_parameters=2_200_000_000,
     memory_usage_mb=4400,
@@ -344,11 +344,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-4B: ~4B params → 8 GB VRAM in bfloat16
 slm_colqwen3_4b = ModelMeta(
-    loader=SLMColQwen3Wrapper,
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
     languages=SUPPORTED_LANGUAGES,
-    revision="main",
-    release_date="2025-01-01",
+    revision="b635fbb3ab145f07608ed10a85def33544de1723",
+    release_date="2025-12-20",
     modalities=["image", "text"],
     n_parameters=4_000_000_000,
     memory_usage_mb=8000,
@@ -368,11 +368,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-8B: ~8B params → 16 GB VRAM in bfloat16
 slm_colqwen3_8b = ModelMeta(
-    loader=SLMColQwen3Wrapper,
+    loader=partial(SLMColQwen3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
     languages=SUPPORTED_LANGUAGES,
-    revision="main",
-    release_date="2025-01-01",
+    revision="36ac136e451a7b8d8229725d69d4ec23aa4f03c8",
+    release_date="2025-12-20",
     modalities=["image", "text"],
     n_parameters=8_000_000_000,
     memory_usage_mb=16000,
@@ -397,11 +397,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
 slm_collfm2_450m = ModelMeta(
-    loader=SLMColLFM2Wrapper,
+    loader=partial(SLMColLFM2Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
     languages=SUPPORTED_LANGUAGES,
-    revision="main",
-    release_date="2025-01-01",
+    revision="a65223fd6633f331ccff4483e47575c3c620dc60",
+    release_date="2025-12-20",
     modalities=["image", "text"],
     n_parameters=450_000_000,
     memory_usage_mb=900,
@@ -426,11 +426,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
 slm_colministral3_3b = ModelMeta(
-    loader=SLMColMinistral3Wrapper,
+    loader=partial(SLMColMinistral3Wrapper),
     name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
     languages=SUPPORTED_LANGUAGES,
-    revision="main",
-    release_date="2025-01-01",
+    revision="54aa3ffbbce20471fdcc4afc07d13989c65e71b8",
+    release_date="2025-12-20",
     modalities=["image", "text"],
     n_parameters=3_000_000_000,
     memory_usage_mb=6000,

From e8516226ca1a02287b068157acd97329be371727 Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Mon, 29 Dec 2025 01:27:07 +0100
Subject: [PATCH 11/20] fix: address review comments - remove partial, add
 adapted_from and training_datasets

---
 .../model_implementations/slm_models.py       | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 1fbaa59147..a583f8c26d 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -13,11 +13,9 @@
 from __future__ import annotations
 
 import logging
-from functools import partial
 from typing import Any
 
 import torch
-from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
 
@@ -141,6 +139,7 @@ def get_image_embeddings(
 
         with torch.no_grad():
             for batch in tqdm(images, desc="Encoding images"):
+                from PIL import Image
                 imgs = [
                     F.to_pil_image(b)
                     if not isinstance(b, Image.Image)
@@ -296,7 +295,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="19c295a18e057d6d82754f627c09408117ffdb66",
@@ -314,13 +313,14 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
-    training_datasets=None,
+    adapted_from="Qwen/Qwen3-VL-2B-Instruct",
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
 # ColQwen3-2B: ~2.2B params → 4.4 GB VRAM in bfloat16
 slm_colqwen3_2b = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="48f699713c10af754684e12060a2af9266462cc9",
@@ -338,13 +338,14 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
-    training_datasets=None,
+    adapted_from="Qwen/Qwen3-VL-2B-Instruct",
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
 # ColQwen3-4B: ~4B params → 8 GB VRAM in bfloat16
 slm_colqwen3_4b = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="b635fbb3ab145f07608ed10a85def33544de1723",
@@ -362,13 +363,14 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
-    training_datasets=None,
+    adapted_from="Qwen/Qwen3-VL-4B-Instruct",
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
 # ColQwen3-8B: ~8B params → 16 GB VRAM in bfloat16
 slm_colqwen3_8b = ModelMeta(
-    loader=partial(SLMColQwen3Wrapper),
+    loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="36ac136e451a7b8d8229725d69d4ec23aa4f03c8",
@@ -386,7 +388,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
-    training_datasets=None,
+    adapted_from="Qwen/Qwen3-VL-8B-Instruct",
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -397,7 +400,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
 slm_collfm2_450m = ModelMeta(
-    loader=partial(SLMColLFM2Wrapper),
+    loader=SLMColLFM2Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="a65223fd6633f331ccff4483e47575c3c620dc60",
@@ -415,7 +418,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
-    training_datasets=None,
+    adapted_from="LiquidAI/LFM2-VL-450M",
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -426,7 +430,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 
 # ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
 slm_colministral3_3b = ModelMeta(
-    loader=partial(SLMColMinistral3Wrapper),
+    loader=SLMColMinistral3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
     languages=SUPPORTED_LANGUAGES,
     revision="54aa3ffbbce20471fdcc4afc07d13989c65e71b8",
@@ -444,6 +448,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
-    training_datasets=None,
+    adapted_from="mistralai/Ministral-3B-Instruct-2410",
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )

From 783f5512a7c36707f4981bc25485bff29e2e691f Mon Sep 17 00:00:00 2001
From: dgolchin <david.golchinfar@h-brs.de>
Date: Mon, 29 Dec 2025 20:20:59 +0100
Subject: [PATCH 12/20] Update mteb/models/model_implementations/slm_models.py

Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
---
 mteb/models/model_implementations/slm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index a583f8c26d..96df09cf9c 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -339,7 +339,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets={"MMarcoReranking", "VDRMultilingualRetrieval"} | COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 

From 6169c6f36f5d1337a1b511e827ea032e7ca8d598 Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Mon, 29 Dec 2025 20:54:58 +0100
Subject: [PATCH 13/20] fix: import COLPALI_CITATION from colpali_models and
 add model_type

---
 mteb/models/model_implementations/slm_models.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 96df09cf9c..78113d6e38 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -25,6 +25,7 @@
 )
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
+from mteb.models.model_implementations.colpali_models import COLPALI_CITATION
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
 
@@ -277,16 +278,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 }
 """
 
-COLPALI_CITATION = """
-@misc{faysse2024colpali,
-  title={ColPali: Efficient Document Retrieval with Vision Language Models},
-  author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Omrani, Bilel and Viaud, Gautier and Hudelot, C\\'eline and Colombo, Pierre},
-  year={2024},
-  eprint={2407.01449},
-  archivePrefix={arXiv},
-  primaryClass={cs.IR}
-}
-"""
 
 
 # =============================================================================
@@ -301,6 +292,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     revision="19c295a18e057d6d82754f627c09408117ffdb66",
     release_date="2025-12-20",
     modalities=["image", "text"],
+    model_type=["late-interaction"],
     n_parameters=1_700_000_000,
     memory_usage_mb=3400,
     max_tokens=262144,
@@ -326,6 +318,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     revision="48f699713c10af754684e12060a2af9266462cc9",
     release_date="2025-12-20",
     modalities=["image", "text"],
+    model_type=["late-interaction"],
     n_parameters=2_200_000_000,
     memory_usage_mb=4400,
     max_tokens=262144,
@@ -351,6 +344,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     revision="b635fbb3ab145f07608ed10a85def33544de1723",
     release_date="2025-12-20",
     modalities=["image", "text"],
+    model_type=["late-interaction"],
     n_parameters=4_000_000_000,
     memory_usage_mb=8000,
     max_tokens=262144,
@@ -376,6 +370,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     revision="36ac136e451a7b8d8229725d69d4ec23aa4f03c8",
     release_date="2025-12-20",
     modalities=["image", "text"],
+    model_type=["late-interaction"],
     n_parameters=8_000_000_000,
     memory_usage_mb=16000,
     max_tokens=262144,
@@ -406,6 +401,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     revision="a65223fd6633f331ccff4483e47575c3c620dc60",
     release_date="2025-12-20",
     modalities=["image", "text"],
+    model_type=["late-interaction"],
     n_parameters=450_000_000,
     memory_usage_mb=900,
     max_tokens=32768,
@@ -436,6 +432,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     revision="54aa3ffbbce20471fdcc4afc07d13989c65e71b8",
     release_date="2025-12-20",
     modalities=["image", "text"],
+    model_type=["late-interaction"],
     n_parameters=3_000_000_000,
     memory_usage_mb=6000,
     max_tokens=262144,

From d1ea011f1e01ec4e3d64080c6a879991ec38c59d Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Tue, 30 Dec 2025 00:58:09 +0500
Subject: [PATCH 14/20] add training datasets

---
 .../model_implementations/slm_models.py       | 70 ++++++++++++-------
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 78113d6e38..956ead25d2 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -25,7 +25,10 @@
 )
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
-from mteb.models.model_implementations.colpali_models import COLPALI_CITATION
+from mteb.models.model_implementations.colpali_models import (
+    COLPALI_CITATION,
+    COLPALI_TRAINING_DATA,
+)
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
 
@@ -50,13 +53,14 @@
 # Base Wrapper Class
 # =============================================================================
 
+
 class SLMBaseWrapper(AbsEncoder):
     """
     Base wrapper for SauerkrautLM multi-vector embedding models.
-    
+
     All our models use late interaction (MaxSim) for retrieval scoring.
     """
-    
+
     model_class = None
     processor_class = None
     model_name_prefix = "SLM"
@@ -73,7 +77,7 @@ def __init__(
         requires_package(
             self, "sauerkrautlm_colpali", model_name, "pip install sauerkrautlm-colpali"
         )
-        
+
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
         self.mdl = self.mdl.to(self.device)
@@ -95,7 +99,7 @@ def encode(
     ) -> Array:
         text_embeddings = None
         image_embeddings = None
-        
+
         if "text" in inputs.dataset.features:
             text_embeddings = self.get_text_embeddings(inputs, **kwargs)
         if "image" in inputs.dataset.features:
@@ -141,10 +145,9 @@ def get_image_embeddings(
         with torch.no_grad():
             for batch in tqdm(images, desc="Encoding images"):
                 from PIL import Image
+
                 imgs = [
-                    F.to_pil_image(b)
-                    if not isinstance(b, Image.Image)
-                    else b
+                    F.to_pil_image(b) if not isinstance(b, Image.Image) else b
                     for b in batch["image"]
                 ]
                 inputs = self.processor.process_images(imgs)
@@ -164,7 +167,7 @@ def get_text_embeddings(
         **kwargs,
     ) -> torch.Tensor:
         all_embeds = []
-        
+
         with torch.no_grad():
             for batch in tqdm(texts, desc="Encoding texts"):
                 inputs = self.processor.process_queries(batch["text"])
@@ -178,16 +181,16 @@ def get_text_embeddings(
         return padded
 
     def calculate_probs(
-        self, 
-        text_embeddings: torch.Tensor, 
+        self,
+        text_embeddings: torch.Tensor,
         image_embeddings: torch.Tensor,
     ) -> torch.Tensor:
         scores = self.similarity(text_embeddings, image_embeddings).T
         return scores.softmax(dim=-1)
 
     def similarity(
-        self, 
-        a: torch.Tensor | list, 
+        self,
+        a: torch.Tensor | list,
         b: torch.Tensor | list,
     ) -> torch.Tensor:
         return self.processor.score(a, b, device=self.device)
@@ -197,11 +200,15 @@ def similarity(
 # ColQwen3 Wrapper
 # =============================================================================
 
+
 class SLMColQwen3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
-        from sauerkrautlm_colpali.models.qwen3.colqwen3 import ColQwen3, ColQwen3Processor
+        from sauerkrautlm_colpali.models.qwen3.colqwen3 import (
+            ColQwen3,
+            ColQwen3Processor,
+        )
 
         self.mdl = ColQwen3.from_pretrained(
             model_name,
@@ -215,7 +222,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             model_name,
             revision=revision,
         )
-        
+
         logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
@@ -223,6 +230,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 # ColLFM2 Wrapper
 # =============================================================================
 
+
 class SLMColLFM2Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColLFM2 models (LFM2 backbone)."""
 
@@ -240,7 +248,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             model_name,
             revision=revision,
         )
-        
+
         logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
@@ -248,11 +256,15 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 # ColMinistral3 Wrapper
 # =============================================================================
 
+
 class SLMColMinistral3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
-        from sauerkrautlm_colpali.models.ministral3.colministral3 import ColMinistral3, ColMinistral3Processor
+        from sauerkrautlm_colpali.models.ministral3.colministral3 import (
+            ColMinistral3,
+            ColMinistral3Processor,
+        )
 
         self.mdl = ColMinistral3.from_pretrained(
             model_name,
@@ -260,8 +272,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
         )
 
         self.processor = ColMinistral3Processor.from_pretrained(model_name)
-        
-        logger.info(f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}")
+
+        logger.info(
+            f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}"
+        )
 
 
 # =============================================================================
@@ -279,11 +293,15 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 """
 
 
-
 # =============================================================================
 # ColQwen3 Model Metadata
 # =============================================================================
 
+_SLM_TRAINING_DATASETS = {
+    "MMarcoReranking",
+    "VDRMultilingualRetrieval",
+} | COLPALI_TRAINING_DATA
+
 # ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
     loader=SLMColQwen3Wrapper,
@@ -306,7 +324,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=_SLM_TRAINING_DATASETS,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -332,7 +350,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-    training_datasets={"MMarcoReranking", "VDRMultilingualRetrieval"} | COLPALI_TRAINING_DATA,
+    training_datasets=_SLM_TRAINING_DATASETS,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -358,7 +376,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-4B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=_SLM_TRAINING_DATASETS,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -384,7 +402,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-8B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=_SLM_TRAINING_DATASETS,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -415,7 +433,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="LiquidAI/LFM2-VL-450M",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=_SLM_TRAINING_DATASETS,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -446,6 +464,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="mistralai/Ministral-3B-Instruct-2410",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=_SLM_TRAINING_DATASETS,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )

From 420322b57b54105cacc481a8117c39efb0bc32fa Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Mon, 29 Dec 2025 22:14:41 +0100
Subject: [PATCH 15/20] fix: remove section headers and use PyPI package
 instead of Git URL

---
 .../model_implementations/slm_models.py       | 37 ++++++++++---------
 pyproject.toml                                |  2 +-
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 956ead25d2..3c9a00ba2c 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -35,10 +35,6 @@
 logger = logging.getLogger(__name__)
 
 
-# =============================================================================
-# Supported Languages
-# =============================================================================
-
 SUPPORTED_LANGUAGES = [
     "eng-Latn",  # English
     "deu-Latn",  # German
@@ -49,11 +45,14 @@
 ]
 
 
+<<<<<<< HEAD
 # =============================================================================
 # Base Wrapper Class
 # =============================================================================
 
 
+=======
+>>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMBaseWrapper(AbsEncoder):
     """
     Base wrapper for SauerkrautLM multi-vector embedding models.
@@ -196,11 +195,14 @@ def similarity(
         return self.processor.score(a, b, device=self.device)
 
 
+<<<<<<< HEAD
 # =============================================================================
 # ColQwen3 Wrapper
 # =============================================================================
 
 
+=======
+>>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMColQwen3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
 
@@ -226,11 +228,14 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
         logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
+<<<<<<< HEAD
 # =============================================================================
 # ColLFM2 Wrapper
 # =============================================================================
 
 
+=======
+>>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMColLFM2Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColLFM2 models (LFM2 backbone)."""
 
@@ -252,11 +257,14 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
         logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
+<<<<<<< HEAD
 # =============================================================================
 # ColMinistral3 Wrapper
 # =============================================================================
 
 
+=======
+>>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMColMinistral3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
 
@@ -278,10 +286,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
         )
 
 
-# =============================================================================
-# Citations
-# =============================================================================
-
 SAUERKRAUTLM_CITATION = """
 @misc{sauerkrautlm-colpali-2025,
   title={SauerkrautLM-ColPali: Multi-Vector Vision Retrieval Models},
@@ -293,6 +297,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 """
 
 
+<<<<<<< HEAD
 # =============================================================================
 # ColQwen3 Model Metadata
 # =============================================================================
@@ -302,6 +307,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     "VDRMultilingualRetrieval",
 } | COLPALI_TRAINING_DATA
 
+=======
+>>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 # ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
     loader=SLMColQwen3Wrapper,
@@ -350,7 +357,11 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
+<<<<<<< HEAD
     training_datasets=_SLM_TRAINING_DATASETS,
+=======
+    training_datasets={"vidore/colpali_train_set"},
+>>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -406,11 +417,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-
-# =============================================================================
-# ColLFM2 Model Metadata
-# =============================================================================
-
 # ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
 slm_collfm2_450m = ModelMeta(
     loader=SLMColLFM2Wrapper,
@@ -437,11 +443,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-
-# =============================================================================
-# ColMinistral3 Model Metadata
-# =============================================================================
-
 # ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
 slm_colministral3_3b = ModelMeta(
     loader=SLMColMinistral3Wrapper,
diff --git a/pyproject.toml b/pyproject.toml
index 2a6a1edc18..38145231a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,7 +93,7 @@ nomic = ["einops>=0.8.1"]
 ark = ["volcengine-python-sdk[ark]==3.0.2", "tiktoken>=0.8.0"]
 colpali_engine = ["colpali_engine>=0.3.12"]
 colqwen3 = ["transformers>=4.57", "torchvision>=0.22.1"]
-sauerkrautlm-colpali = ["sauerkrautlm-colpali @ git+https://github.com/VAGOsolutions/sauerkrautlm-colpali.git"]
+sauerkrautlm-colpali = ["sauerkrautlm-colpali>=0.1.0"]
 xet = ["huggingface_hub>=0.32.0"]
 youtu = ["tencentcloud-sdk-python-common>=3.0.1454", "tencentcloud-sdk-python-lkeap>=3.0.1451"]
 llama-embed-nemotron = ["transformers==4.51.0"]

From e3390509abeb54e0f7985bce09689a614e289676 Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Tue, 30 Dec 2025 20:26:21 +0100
Subject: [PATCH 16/20] fix: resolve merge conflicts and remove section headers

---
 .../model_implementations/slm_models.py       | 114 ++++--------------
 1 file changed, 25 insertions(+), 89 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 3c9a00ba2c..4a9a989415 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -25,10 +25,7 @@
 )
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
-from mteb.models.model_implementations.colpali_models import (
-    COLPALI_CITATION,
-    COLPALI_TRAINING_DATA,
-)
+from mteb.models.model_implementations.colpali_models import COLPALI_CITATION
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
 
@@ -45,21 +42,13 @@
 ]
 
 
-<<<<<<< HEAD
-# =============================================================================
-# Base Wrapper Class
-# =============================================================================
-
-
-=======
->>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMBaseWrapper(AbsEncoder):
     """
     Base wrapper for SauerkrautLM multi-vector embedding models.
-
+    
     All our models use late interaction (MaxSim) for retrieval scoring.
     """
-
+    
     model_class = None
     processor_class = None
     model_name_prefix = "SLM"
@@ -76,7 +65,7 @@ def __init__(
         requires_package(
             self, "sauerkrautlm_colpali", model_name, "pip install sauerkrautlm-colpali"
         )
-
+        
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
         self.mdl = self.mdl.to(self.device)
@@ -98,7 +87,7 @@ def encode(
     ) -> Array:
         text_embeddings = None
         image_embeddings = None
-
+        
         if "text" in inputs.dataset.features:
             text_embeddings = self.get_text_embeddings(inputs, **kwargs)
         if "image" in inputs.dataset.features:
@@ -144,9 +133,10 @@ def get_image_embeddings(
         with torch.no_grad():
             for batch in tqdm(images, desc="Encoding images"):
                 from PIL import Image
-
                 imgs = [
-                    F.to_pil_image(b) if not isinstance(b, Image.Image) else b
+                    F.to_pil_image(b)
+                    if not isinstance(b, Image.Image)
+                    else b
                     for b in batch["image"]
                 ]
                 inputs = self.processor.process_images(imgs)
@@ -166,7 +156,7 @@ def get_text_embeddings(
         **kwargs,
     ) -> torch.Tensor:
         all_embeds = []
-
+        
         with torch.no_grad():
             for batch in tqdm(texts, desc="Encoding texts"):
                 inputs = self.processor.process_queries(batch["text"])
@@ -180,37 +170,26 @@ def get_text_embeddings(
         return padded
 
     def calculate_probs(
-        self,
-        text_embeddings: torch.Tensor,
+        self, 
+        text_embeddings: torch.Tensor, 
         image_embeddings: torch.Tensor,
     ) -> torch.Tensor:
         scores = self.similarity(text_embeddings, image_embeddings).T
         return scores.softmax(dim=-1)
 
     def similarity(
-        self,
-        a: torch.Tensor | list,
+        self, 
+        a: torch.Tensor | list, 
         b: torch.Tensor | list,
     ) -> torch.Tensor:
         return self.processor.score(a, b, device=self.device)
 
 
-<<<<<<< HEAD
-# =============================================================================
-# ColQwen3 Wrapper
-# =============================================================================
-
-
-=======
->>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMColQwen3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
-        from sauerkrautlm_colpali.models.qwen3.colqwen3 import (
-            ColQwen3,
-            ColQwen3Processor,
-        )
+        from sauerkrautlm_colpali.models.qwen3.colqwen3 import ColQwen3, ColQwen3Processor
 
         self.mdl = ColQwen3.from_pretrained(
             model_name,
@@ -224,18 +203,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             model_name,
             revision=revision,
         )
-
+        
         logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
-<<<<<<< HEAD
-# =============================================================================
-# ColLFM2 Wrapper
-# =============================================================================
-
-
-=======
->>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMColLFM2Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColLFM2 models (LFM2 backbone)."""
 
@@ -253,26 +224,15 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             model_name,
             revision=revision,
         )
-
+        
         logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
-<<<<<<< HEAD
-# =============================================================================
-# ColMinistral3 Wrapper
-# =============================================================================
-
-
-=======
->>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
 class SLMColMinistral3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
-        from sauerkrautlm_colpali.models.ministral3.colministral3 import (
-            ColMinistral3,
-            ColMinistral3Processor,
-        )
+        from sauerkrautlm_colpali.models.ministral3.colministral3 import ColMinistral3, ColMinistral3Processor
 
         self.mdl = ColMinistral3.from_pretrained(
             model_name,
@@ -280,10 +240,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
         )
 
         self.processor = ColMinistral3Processor.from_pretrained(model_name)
-
-        logger.info(
-            f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}"
-        )
+        
+        logger.info(f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
 SAUERKRAUTLM_CITATION = """
@@ -297,19 +255,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
 """
 
 
-<<<<<<< HEAD
-# =============================================================================
-# ColQwen3 Model Metadata
-# =============================================================================
-
-_SLM_TRAINING_DATASETS = {
-    "MMarcoReranking",
-    "VDRMultilingualRetrieval",
-} | COLPALI_TRAINING_DATA
-
-=======
->>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
-# ColQwen3-1.7B Turbo: ~1.7B params → 3.4 GB VRAM in bfloat16
 slm_colqwen3_1_7b_turbo = ModelMeta(
     loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
@@ -331,11 +276,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-    training_datasets=_SLM_TRAINING_DATASETS,
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-# ColQwen3-2B: ~2.2B params → 4.4 GB VRAM in bfloat16
 slm_colqwen3_2b = ModelMeta(
     loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
@@ -357,15 +301,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-<<<<<<< HEAD
-    training_datasets=_SLM_TRAINING_DATASETS,
-=======
     training_datasets={"vidore/colpali_train_set"},
->>>>>>> 32881a4 (fix: remove section headers and use PyPI package instead of Git URL)
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-# ColQwen3-4B: ~4B params → 8 GB VRAM in bfloat16
 slm_colqwen3_4b = ModelMeta(
     loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
@@ -387,11 +326,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-4B-Instruct",
-    training_datasets=_SLM_TRAINING_DATASETS,
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-# ColQwen3-8B: ~8B params → 16 GB VRAM in bfloat16
 slm_colqwen3_8b = ModelMeta(
     loader=SLMColQwen3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
@@ -413,11 +351,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-8B-Instruct",
-    training_datasets=_SLM_TRAINING_DATASETS,
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-# ColLFM2-450M: ~450M params → 900 MB VRAM in bfloat16
 slm_collfm2_450m = ModelMeta(
     loader=SLMColLFM2Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
@@ -430,7 +367,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     memory_usage_mb=900,
     max_tokens=32768,
     embed_dim=128,
-    license="https://huggingface.co/LiquidAI/LFM2-VL-450M/blob/main/LICENSE",  # LiquidAI LFM 1.0 License
+    license="https://huggingface.co/LiquidAI/LFM2-VL-450M/blob/main/LICENSE",
     open_weights=True,
     public_training_code=None,
     public_training_data=None,
@@ -439,11 +376,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="LiquidAI/LFM2-VL-450M",
-    training_datasets=_SLM_TRAINING_DATASETS,
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
-# ColMinistral3-3B: ~3B params → 6 GB VRAM in bfloat16
 slm_colministral3_3b = ModelMeta(
     loader=SLMColMinistral3Wrapper,
     name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
@@ -465,6 +401,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="mistralai/Ministral-3B-Instruct-2410",
-    training_datasets=_SLM_TRAINING_DATASETS,
+    training_datasets={"vidore/colpali_train_set"},
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )

From b63607ef8f3ad45eca645bf377cc8c0cadd08269 Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Tue, 30 Dec 2025 23:02:32 +0100
Subject: [PATCH 17/20] fix: use COLPALI_TRAINING_DATA for training_datasets

---
 mteb/models/model_implementations/slm_models.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 4a9a989415..2074dcbae7 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -25,7 +25,7 @@
 )
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
-from mteb.models.model_implementations.colpali_models import COLPALI_CITATION
+from mteb.models.model_implementations.colpali_models import COLPALI_CITATION, COLPALI_TRAINING_DATA
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
 
@@ -276,7 +276,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -301,7 +301,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-2B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -326,7 +326,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-4B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -351,7 +351,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="Qwen/Qwen3-VL-8B-Instruct",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -376,7 +376,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="LiquidAI/LFM2-VL-450M",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )
 
@@ -401,6 +401,6 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     similarity_fn_name=ScoringFunction.MAX_SIM,
     use_instructions=True,
     adapted_from="mistralai/Ministral-3B-Instruct-2410",
-    training_datasets={"vidore/colpali_train_set"},
+    training_datasets=COLPALI_TRAINING_DATA,
     citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
 )

From ca33bdceb91655eafde28cbc06a69260a09b09de Mon Sep 17 00:00:00 2001
From: David Golchinfar <d.golchin@web.de>
Date: Sat, 3 Jan 2026 20:43:27 +0100
Subject: [PATCH 18/20] fix: use exact n_parameters and memory_usage_mb values
 from HuggingFace

---
 .../model_implementations/slm_models.py       | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 2074dcbae7..3805dc146a 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -263,8 +263,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     release_date="2025-12-20",
     modalities=["image", "text"],
     model_type=["late-interaction"],
-    n_parameters=1_700_000_000,
-    memory_usage_mb=3400,
+    n_parameters=1_756_572_288,
+    memory_usage_mb=3350,
     max_tokens=262144,
     embed_dim=128,
     license="apache-2.0",
@@ -288,8 +288,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     release_date="2025-12-20",
     modalities=["image", "text"],
     model_type=["late-interaction"],
-    n_parameters=2_200_000_000,
-    memory_usage_mb=4400,
+    n_parameters=2_127_794_304,
+    memory_usage_mb=4058,
     max_tokens=262144,
     embed_dim=128,
     license="apache-2.0",
@@ -313,8 +313,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     release_date="2025-12-20",
     modalities=["image", "text"],
     model_type=["late-interaction"],
-    n_parameters=4_000_000_000,
-    memory_usage_mb=8000,
+    n_parameters=4_438_143_616,
+    memory_usage_mb=8465,
     max_tokens=262144,
     embed_dim=128,
     license="apache-2.0",
@@ -338,8 +338,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     release_date="2025-12-20",
     modalities=["image", "text"],
     model_type=["late-interaction"],
-    n_parameters=8_000_000_000,
-    memory_usage_mb=16000,
+    n_parameters=8_145_318_256,
+    memory_usage_mb=15536,
     max_tokens=262144,
     embed_dim=128,
     license="apache-2.0",
@@ -363,8 +363,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     release_date="2025-12-20",
     modalities=["image", "text"],
     model_type=["late-interaction"],
-    n_parameters=450_000_000,
-    memory_usage_mb=900,
+    n_parameters=450_953_856,
+    memory_usage_mb=860,
     max_tokens=32768,
     embed_dim=128,
     license="https://huggingface.co/LiquidAI/LFM2-VL-450M/blob/main/LICENSE",
@@ -388,8 +388,8 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
     release_date="2025-12-20",
     modalities=["image", "text"],
     model_type=["late-interaction"],
-    n_parameters=3_000_000_000,
-    memory_usage_mb=6000,
+    n_parameters=4_252_136_448,
+    memory_usage_mb=8110,
     max_tokens=262144,
     embed_dim=128,
     license="apache-2.0",

From 903cf58a6714cef2da4886c922660a46acd1468c Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sun, 4 Jan 2026 13:12:15 +0500
Subject: [PATCH 19/20] don't build 3.14

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7274d29a48..a1820e5e22 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,7 +95,7 @@ nomic = ["einops>=0.8.1"]
 ark = ["volcengine-python-sdk[ark]==3.0.2", "tiktoken>=0.8.0"]
 colpali_engine = ["colpali_engine>=0.3.12; python_full_version < '3.14'"]
 colqwen3 = ["transformers>=4.57", "torchvision>=0.22.1"]
-sauerkrautlm-colpali = ["sauerkrautlm-colpali>=0.1.0"]
+sauerkrautlm-colpali = ["sauerkrautlm-colpali>=0.1.0; python_full_version < '3.14'"]
 xet = ["huggingface_hub>=0.32.0"]
 youtu = ["tencentcloud-sdk-python-common>=3.0.1454", "tencentcloud-sdk-python-lkeap>=3.0.1451"]
 llama-embed-nemotron = ["transformers==4.51.0"]

From 40170cd47e8fedd0ce1e27aac234aad91586d3b5 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sun, 4 Jan 2026 14:23:40 +0500
Subject: [PATCH 20/20] lint

---
 .../model_implementations/slm_models.py       | 48 +++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/mteb/models/model_implementations/slm_models.py b/mteb/models/model_implementations/slm_models.py
index 3805dc146a..a0f152c6b2 100644
--- a/mteb/models/model_implementations/slm_models.py
+++ b/mteb/models/model_implementations/slm_models.py
@@ -25,7 +25,10 @@
 )
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
-from mteb.models.model_implementations.colpali_models import COLPALI_CITATION, COLPALI_TRAINING_DATA
+from mteb.models.model_implementations.colpali_models import (
+    COLPALI_CITATION,
+    COLPALI_TRAINING_DATA,
+)
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
 
@@ -45,10 +48,10 @@
 class SLMBaseWrapper(AbsEncoder):
     """
     Base wrapper for SauerkrautLM multi-vector embedding models.
-    
+
     All our models use late interaction (MaxSim) for retrieval scoring.
     """
-    
+
     model_class = None
     processor_class = None
     model_name_prefix = "SLM"
@@ -65,7 +68,7 @@ def __init__(
         requires_package(
             self, "sauerkrautlm_colpali", model_name, "pip install sauerkrautlm-colpali"
         )
-        
+
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
         self.mdl = self.mdl.to(self.device)
@@ -87,7 +90,7 @@ def encode(
     ) -> Array:
         text_embeddings = None
         image_embeddings = None
-        
+
         if "text" in inputs.dataset.features:
             text_embeddings = self.get_text_embeddings(inputs, **kwargs)
         if "image" in inputs.dataset.features:
@@ -133,10 +136,9 @@ def get_image_embeddings(
         with torch.no_grad():
             for batch in tqdm(images, desc="Encoding images"):
                 from PIL import Image
+
                 imgs = [
-                    F.to_pil_image(b)
-                    if not isinstance(b, Image.Image)
-                    else b
+                    F.to_pil_image(b) if not isinstance(b, Image.Image) else b
                     for b in batch["image"]
                 ]
                 inputs = self.processor.process_images(imgs)
@@ -156,7 +158,7 @@ def get_text_embeddings(
         **kwargs,
     ) -> torch.Tensor:
         all_embeds = []
-        
+
         with torch.no_grad():
             for batch in tqdm(texts, desc="Encoding texts"):
                 inputs = self.processor.process_queries(batch["text"])
@@ -170,16 +172,16 @@ def get_text_embeddings(
         return padded
 
     def calculate_probs(
-        self, 
-        text_embeddings: torch.Tensor, 
+        self,
+        text_embeddings: torch.Tensor,
         image_embeddings: torch.Tensor,
     ) -> torch.Tensor:
         scores = self.similarity(text_embeddings, image_embeddings).T
         return scores.softmax(dim=-1)
 
     def similarity(
-        self, 
-        a: torch.Tensor | list, 
+        self,
+        a: torch.Tensor | list,
         b: torch.Tensor | list,
     ) -> torch.Tensor:
         return self.processor.score(a, b, device=self.device)
@@ -189,7 +191,10 @@ class SLMColQwen3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
-        from sauerkrautlm_colpali.models.qwen3.colqwen3 import ColQwen3, ColQwen3Processor
+        from sauerkrautlm_colpali.models.qwen3.colqwen3 import (
+            ColQwen3,
+            ColQwen3Processor,
+        )
 
         self.mdl = ColQwen3.from_pretrained(
             model_name,
@@ -203,7 +208,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             model_name,
             revision=revision,
         )
-        
+
         logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
@@ -224,7 +229,7 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
             model_name,
             revision=revision,
         )
-        
+
         logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
 
 
@@ -232,7 +237,10 @@ class SLMColMinistral3Wrapper(SLMBaseWrapper):
     """Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
 
     def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
-        from sauerkrautlm_colpali.models.ministral3.colministral3 import ColMinistral3, ColMinistral3Processor
+        from sauerkrautlm_colpali.models.ministral3.colministral3 import (
+            ColMinistral3,
+            ColMinistral3Processor,
+        )
 
         self.mdl = ColMinistral3.from_pretrained(
             model_name,
@@ -240,8 +248,10 @@ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwar
         )
 
         self.processor = ColMinistral3Processor.from_pretrained(model_name)
-        
-        logger.info(f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}")
+
+        logger.info(
+            f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}"
+        )
 
 
 SAUERKRAUTLM_CITATION = """