From 86484383307937db422678a00235c4ee313c4268 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Tue, 16 Dec 2025 14:51:49 +0300 Subject: [PATCH 1/8] add nemotron rerank --- .../nvidia_llama_nemoretriever_colemb.py | 70 +++++++++++++++++-- mteb/models/model_meta.py | 2 +- mteb/models/sentence_transformer_wrapper.py | 8 ++- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py b/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py index 72c31a9253..1afa658b34 100644 --- a/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +++ b/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py @@ -1,17 +1,16 @@ -from typing import TYPE_CHECKING, Any +from typing import Any import torch +from packaging.version import Version from torch.utils.data import DataLoader +from transformers import __version__ as transformers_version from mteb.abstasks.task_metadata import TaskMetadata +from mteb.models import CrossEncoderWrapper from mteb.models.abs_encoder import AbsEncoder -from mteb.models.model_meta import ModelMeta +from mteb.models.model_meta import ModelMeta, ScoringFunction from mteb.types import Array, BatchedInput, PromptType -if TYPE_CHECKING: - pass - - LLAMA_NEMORETRIEVER_CITATION = """@misc{xu2025llamanemoretrievercolembedtopperforming, title={Llama Nemoretriever Colembed: Top-Performing Text-Image Retrieval Model}, author={Mengyao Xu and Gabriel Moreira and Ronay Ak and Radek Osmulski and Yauhen Babakhin and Zhiding Yu and Benedikt Schifferer and Even Oldridge}, @@ -34,6 +33,14 @@ def __init__( attn_implementation="flash_attention_2", **kwargs, ): + required_transformers_version = "4.49.0" + + if Version(transformers_version) != Version(required_transformers_version): + raise RuntimeError( + f"transformers version {transformers_version} is not match with required " + f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`" + ) + from transformers import AutoModel self.model = AutoModel.from_pretrained( @@ -189,3 +196,54 @@ def encode( training_datasets=TRAINING_DATA, citation=LLAMA_NEMORETRIEVER_CITATION, ) + + +def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderWrapper: + required_transformers_version = "4.42.4" + + if Version(transformers_version) != Version(required_transformers_version): + raise RuntimeError( + f"transformers version {transformers_version} is not match with required " + f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`" + ) + + return CrossEncoderWrapper( + model=model, + revision=revision, + **kwargs, + ) + + +nemotron_rerank_1b_v2 = ModelMeta( + loader=_nemotron_rerank_model, + loader_kwargs=dict( + trust_remote_code=True, + query_prefix="question:", + passage_prefix=" \n \n passage:", + ), + name="nvidia/llama-nemotron-rerank-1b-v2", + revision="78efcfdc23b53a753f6c73f2d78b18132a34ac4d", + release_date="2025-10-16", + languages=["eng-Latn"], + n_parameters=1235816448, + memory_usage_mb=2357.0, + max_tokens=4096, + embed_dim=2048, + license="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/", + open_weights=True, + public_training_code=None, + public_training_data=None, + framework=["PyTorch", "Sentence Transformers"], + reference="https://huggingface.co/nvidia/llama-nemotron-rerank-1b-v2", + similarity_fn_name=ScoringFunction.COSINE, + use_instructions=None, + training_datasets=set( + # private + ), + adapted_from="meta-llama/Llama-3.2-1B", + superseded_by=None, + modalities=["text"], + is_cross_encoder=True, + citation=None, + contacts=None, +) diff --git a/mteb/models/model_meta.py b/mteb/models/model_meta.py index a4a657a843..9db864ab6f 100644 --- a/mteb/models/model_meta.py +++ b/mteb/models/model_meta.py @@ -291,7 +291,7 @@ def _from_hub( revision = revisions[0].commit_id if revisions else None release_date = cls.fetch_release_date(model_name) - model_license = card_data.license + model_license = card_data.license if card_data.license != "other" else None n_parameters = cls._calculate_num_parameters_from_hub(model_name) memory_usage_mb = cls._calculate_memory_usage_mb(model_name, n_parameters) if model_config and hasattr(model_config, "hidden_size"): diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 2330d97037..c5f84a51db 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -261,6 +261,8 @@ def __init__( self, model: CrossEncoder | str, revision: str | None = None, + query_prefix: str | None = None, + passage_prefix: str | None = None, **kwargs, ) -> None: from sentence_transformers import CrossEncoder @@ -271,6 +273,8 @@ def __init__( self.model = CrossEncoder(model, revision=revision, **kwargs) self.mteb_model_meta = ModelMeta.from_cross_encoder(self.model) + self.query_prefix = query_prefix or "" + self.passage_prefix = passage_prefix or "" def predict( self, @@ -299,10 +303,10 @@ def predict( The predicted relevance scores for each inputs pair. """ all_queries_with_instructions = [ - text for batch in inputs1 for text in batch["text"] + self.query_prefix + text for batch in inputs1 for text in batch["text"] ] all_corpus_with_instructions = [ - text for batch in inputs2 for text in batch["text"] + self.passage_prefix + text for batch in inputs2 for text in batch["text"] ] return self.model.predict( From 711ccaa6f69ca68d3c9add0b37c1530f28333d08 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Tue, 16 Dec 2025 15:29:33 +0300 Subject: [PATCH 2/8] move to nvidia models --- .../nvidia_llama_nemoretriever_colemb.py | 54 +----------------- .../model_implementations/nvidia_models.py | 57 ++++++++++++++++++- 2 files changed, 57 insertions(+), 54 deletions(-) diff --git a/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py b/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py index 1afa658b34..f0afd9d290 100644 --- a/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +++ b/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py @@ -6,9 +6,8 @@ from transformers import __version__ as transformers_version from mteb.abstasks.task_metadata import TaskMetadata -from mteb.models import CrossEncoderWrapper from mteb.models.abs_encoder import AbsEncoder -from mteb.models.model_meta import ModelMeta, ScoringFunction +from mteb.models.model_meta import ModelMeta from mteb.types import Array, BatchedInput, PromptType LLAMA_NEMORETRIEVER_CITATION = """@misc{xu2025llamanemoretrievercolembedtopperforming, @@ -196,54 +195,3 @@ def encode( training_datasets=TRAINING_DATA, citation=LLAMA_NEMORETRIEVER_CITATION, ) - - -def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderWrapper: - required_transformers_version = "4.42.4" - - if Version(transformers_version) != Version(required_transformers_version): - raise RuntimeError( - f"transformers version {transformers_version} is not match with required " - f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`" - ) - - return CrossEncoderWrapper( - model=model, - revision=revision, - **kwargs, - ) - - -nemotron_rerank_1b_v2 = ModelMeta( - loader=_nemotron_rerank_model, - loader_kwargs=dict( - trust_remote_code=True, - query_prefix="question:", - passage_prefix=" \n \n passage:", - ), - name="nvidia/llama-nemotron-rerank-1b-v2", - revision="78efcfdc23b53a753f6c73f2d78b18132a34ac4d", - release_date="2025-10-16", - languages=["eng-Latn"], - n_parameters=1235816448, - memory_usage_mb=2357.0, - max_tokens=4096, - embed_dim=2048, - license="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/", - open_weights=True, - public_training_code=None, - public_training_data=None, - framework=["PyTorch", "Sentence Transformers"], - reference="https://huggingface.co/nvidia/llama-nemotron-rerank-1b-v2", - similarity_fn_name=ScoringFunction.COSINE, - use_instructions=None, - training_datasets=set( - # private - ), - adapted_from="meta-llama/Llama-3.2-1B", - superseded_by=None, - modalities=["text"], - is_cross_encoder=True, - citation=None, - contacts=None, -) diff --git a/mteb/models/model_implementations/nvidia_models.py b/mteb/models/model_implementations/nvidia_models.py index b7c232e791..d3fbda4181 100644 --- a/mteb/models/model_implementations/nvidia_models.py +++ b/mteb/models/model_implementations/nvidia_models.py @@ -9,8 +9,9 @@ from transformers import AutoModel, AutoTokenizer from transformers import __version__ as transformers_version -from mteb import TaskMetadata from mteb._requires_package import requires_package +from mteb.abstasks.task_metadata import TaskMetadata +from mteb.models import CrossEncoderWrapper from mteb.models.abs_encoder import AbsEncoder from mteb.models.instruct_wrapper import InstructSentenceTransformerModel from mteb.models.model_meta import ModelMeta, ScoringFunction @@ -547,3 +548,57 @@ def _extract_embeddings( contacts=["ybabakhin"], citation=NV_RETRIEVER_CITATION, ) + + +def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderWrapper: + required_transformers_version = "4.47.1" + + if Version(transformers_version) != Version(required_transformers_version): + raise RuntimeError( + f"transformers version {transformers_version} is not match with required " + f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`" + ) + + return CrossEncoderWrapper( + model=model, + revision=revision, + **kwargs, + ) + + +nemotron_rerank_1b_v2 = ModelMeta( + loader=_nemotron_rerank_model, + loader_kwargs=dict( + trust_remote_code=True, + query_prefix="question:", + passage_prefix=" \n \n passage:", + model_kwargs={"torch_dtype": torch.bfloat16}, # "trust_remote_code": True}, + # config_kwargs={"trust_remote_code": True}, + # tokenizer_kwargs={"trust_remote_code": True}, + ), + name="nvidia/llama-nemotron-rerank-1b-v2", + revision="78efcfdc23b53a753f6c73f2d78b18132a34ac4d", + release_date="2025-10-16", + languages=["eng-Latn"], + n_parameters=1235816448, + memory_usage_mb=2357.0, + max_tokens=4096, + embed_dim=2048, + license="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/", + open_weights=True, + public_training_code=None, + public_training_data=None, + framework=["PyTorch", "Sentence Transformers"], + reference="https://huggingface.co/nvidia/llama-nemotron-rerank-1b-v2", + similarity_fn_name=ScoringFunction.COSINE, + use_instructions=None, + training_datasets=set( + # private + ), + adapted_from="meta-llama/Llama-3.2-1B", + superseded_by=None, + modalities=["text"], + is_cross_encoder=True, + citation=None, + contacts=None, +) From 4a2bd2c3d4f6c70c0403969774510e746a42f80c Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Tue, 16 Dec 2025 15:36:29 +0300 Subject: [PATCH 3/8] removed extra params --- mteb/models/model_implementations/nvidia_models.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mteb/models/model_implementations/nvidia_models.py b/mteb/models/model_implementations/nvidia_models.py index d3fbda4181..7d9cc0afa5 100644 --- a/mteb/models/model_implementations/nvidia_models.py +++ b/mteb/models/model_implementations/nvidia_models.py @@ -572,9 +572,7 @@ def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderW trust_remote_code=True, query_prefix="question:", passage_prefix=" \n \n passage:", - model_kwargs={"torch_dtype": torch.bfloat16}, # "trust_remote_code": True}, - # config_kwargs={"trust_remote_code": True}, - # tokenizer_kwargs={"trust_remote_code": True}, + model_kwargs={"torch_dtype": torch.bfloat16}, ), name="nvidia/llama-nemotron-rerank-1b-v2", revision="78efcfdc23b53a753f6c73f2d78b18132a34ac4d", From 01e6334a04dc5eb474f37e56b3fc80d54f3d8be3 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sun, 28 Dec 2025 15:37:06 +0500 Subject: [PATCH 4/8] Apply suggestions from code review Co-authored-by: Isaac Chung --- .../nvidia_llama_nemoretriever_colemb.py | 2 +- mteb/models/sentence_transformer_wrapper.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py b/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py index f0afd9d290..725ed8ed4d 100644 --- a/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +++ b/mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py @@ -37,7 +37,7 @@ def __init__( if Version(transformers_version) != Version(required_transformers_version): raise RuntimeError( f"transformers version {transformers_version} is not match with required " - f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`" + f"install version {required_transformers_version} to run `nvidia/llama-nemoretriever-colembed`" ) from transformers import AutoModel diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index c5f84a51db..4196b88e57 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -261,8 +261,8 @@ def __init__( self, model: CrossEncoder | str, revision: str | None = None, - query_prefix: str | None = None, - passage_prefix: str | None = None, + query_prefix: str = "", + passage_prefix: str = "", **kwargs, ) -> None: from sentence_transformers import CrossEncoder From ba013effda8e7c4981429ef11fc66bfb0a63b698 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 28 Dec 2025 15:37:54 +0500 Subject: [PATCH 5/8] remove or --- mteb/models/sentence_transformer_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index fc69f24ec5..508c7e05d3 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -278,8 +278,8 @@ def __init__( self.model = CrossEncoder(model, revision=revision, **kwargs) self.mteb_model_meta = ModelMeta.from_cross_encoder(self.model) - self.query_prefix = query_prefix or "" - self.passage_prefix = passage_prefix or "" + self.query_prefix = query_prefix + self.passage_prefix = passage_prefix def predict( self, From f3d00e1058296abb079d49ea2fc2af41df7840e0 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 28 Dec 2025 15:38:18 +0500 Subject: [PATCH 6/8] add docstring --- mteb/models/sentence_transformer_wrapper.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 508c7e05d3..74a70535ad 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -260,7 +260,15 @@ def encode( class CrossEncoderWrapper: - """Wrapper for CrossEncoder models.""" + """Wrapper for CrossEncoder models. + + Args: + model: The CrossEncoder model to use. Can be a string (model name) or a CrossEncoder model. + revision: The revision of the model to use. + query_prefix: A prefix to add to all queries. + passage_prefix: A prefix to add to all passages. + **kwargs: Additional arguments to pass to the CrossEncoder model. + """ def __init__( self, From 00f113201646bb9ddc407c8d7d98b6705b1621d1 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Tue, 13 Jan 2026 23:28:28 +0300 Subject: [PATCH 7/8] Update mteb/models/model_implementations/nvidia_models.py Co-authored-by: Yauhen Babakhin --- mteb/models/model_implementations/nvidia_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/models/model_implementations/nvidia_models.py b/mteb/models/model_implementations/nvidia_models.py index 46324d3d38..7551d2a74f 100644 --- a/mteb/models/model_implementations/nvidia_models.py +++ b/mteb/models/model_implementations/nvidia_models.py @@ -559,7 +559,7 @@ def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderW if Version(transformers_version) != Version(required_transformers_version): raise RuntimeError( f"transformers version {transformers_version} is not match with required " - f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`" + f"install version {required_transformers_version} to run `nvidia/llama-nemotron-rerank-1b-v2`" ) return CrossEncoderWrapper( From c105e3cc024616f24c12c5b47ccf32bfc40563b5 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Wed, 14 Jan 2026 01:43:13 +0500 Subject: [PATCH 8/8] update --- mteb/models/model_implementations/nvidia_models.py | 4 ++-- mteb/models/sentence_transformer_wrapper.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mteb/models/model_implementations/nvidia_models.py b/mteb/models/model_implementations/nvidia_models.py index 48a6680a2a..05c567fb41 100644 --- a/mteb/models/model_implementations/nvidia_models.py +++ b/mteb/models/model_implementations/nvidia_models.py @@ -654,7 +654,7 @@ def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderW trust_remote_code=True, query_prefix="question:", passage_prefix=" \n \n passage:", - model_kwargs={"torch_dtype": torch.bfloat16}, + model_kwargs={"torch_dtype": torch.float32}, ), name="nvidia/llama-nemotron-rerank-1b-v2", revision="78efcfdc23b53a753f6c73f2d78b18132a34ac4d", @@ -678,7 +678,7 @@ def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderW adapted_from="meta-llama/Llama-3.2-1B", superseded_by=None, modalities=["text"], - is_cross_encoder=True, + model_type=["cross-encoder"], citation=None, contacts=None, ) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 4d888d1199..356e2b65b1 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -271,6 +271,7 @@ class CrossEncoderWrapper: Args: model: The CrossEncoder model to use. Can be a string (model name) or a CrossEncoder model. revision: The revision of the model to use. + device: The device used to load the model. query_prefix: A prefix to add to all queries. passage_prefix: A prefix to add to all passages. **kwargs: Additional arguments to pass to the CrossEncoder model.