Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions mteb/models/bge_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
model_prompts = {"query": "Represent this sentence for searching relevant passages: "}

bge_small_en_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="BAAI/bge-small-en-v1.5",
revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
Expand All @@ -30,7 +30,7 @@
)

bge_base_en_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="BAAI/bge-base-en-v1.5",
revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
Expand All @@ -53,7 +53,7 @@
)

bge_large_en_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="BAAI/bge-large-en-v1.5",
revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/cohere_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def encode(
}

cohere_mult_3 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
CohereTextEmbeddingModel,
model_name="embed-multilingual-v3.0",
model_prompts=model_prompts,
Expand All @@ -238,7 +238,7 @@ def encode(
)

cohere_eng_3 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
CohereTextEmbeddingModel,
model_name="embed-english-v3.0",
model_prompts=model_prompts,
Expand Down
11 changes: 5 additions & 6 deletions mteb/models/e5_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@
MISTRAL_LANGUAGES = ["eng_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "spa_Latn"]


def e5_instruction(instruction: str) -> str:
return f"Instruct: {instruction}\nQuery: "
E5_INSTRUCTION = "Instruct: {instruction}\nQuery: "


e5_instruct = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="intfloat/multilingual-e5-large-instruct",
instruction_template=e5_instruction,
instruction_template=E5_INSTRUCTION,
attn="cccc",
pooling_method="mean",
mode="embedding",
Expand All @@ -44,10 +43,10 @@ def e5_instruction(instruction: str) -> str:
)

e5_mistral = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="intfloat/e5-mistral-7b-instruct",
instruction_template=e5_instruction,
instruction_template=E5_INSTRUCTION,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
Expand Down
14 changes: 7 additions & 7 deletions mteb/models/e5_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
}

e5_mult_small = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/multilingual-e5-small",
revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
Expand All @@ -137,7 +137,7 @@
)

e5_mult_base = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/multilingual-e5-base",
model_prompts=model_prompts,
Expand All @@ -159,7 +159,7 @@
)

e5_mult_large = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/multilingual-e5-large",
revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
Expand All @@ -182,7 +182,7 @@
)

e5_eng_small_v2 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-small-v2",
model_prompts=model_prompts,
Expand All @@ -204,7 +204,7 @@
)

e5_eng_small = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-small",
revision="e272f3049e853b47cb5ca3952268c6662abda68f",
Expand All @@ -227,7 +227,7 @@
)

e5_eng_base_v2 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-base-v2",
revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
Expand All @@ -252,7 +252,7 @@
)

e5_eng_large_v2 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-large-v2",
revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/gritlm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def gritlm_instruction(instruction: str = "") -> str:


gritlm7b = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="GritLM/GritLM-7B",
instruction_template=gritlm_instruction,
Expand All @@ -40,7 +40,7 @@ def gritlm_instruction(instruction: str = "") -> str:
use_instructions=True,
)
gritlm8x7b = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="GritLM/GritLM-8x7B",
instruction_template=gritlm_instruction,
Expand Down
66 changes: 63 additions & 3 deletions mteb/models/gte_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
from functools import partial

from mteb.model_meta import ModelMeta
from mteb.models.instruct_wrapper import instruct_wrapper


def instruction_template(instruction: str) -> str:
return f"Instruct: {instruction}\nQuery: " if instruction else ""

from .instruct_wrapper import instruct_wrapper

gte_Qwen2_7B_instruct = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Alibaba-NLP/gte-Qwen2-7B-instruct",
instruction_template="Instruct: {instruction}\nQuery: ",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
Expand All @@ -33,3 +37,59 @@
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)


gte_Qwen1_5_7B_instruct = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
torch_dtype="auto",
normalized=True,
),
name="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
languages=["eng_Latn"],
open_weights=True,
revision="07d27e5226328010336563bc1b564a5e3436a298",
release_date="2024-04-20", # initial commit of hf model.
n_parameters=7_720_000_000,
memory_usage=None,
embed_dim=4096,
license="apache-2.0",
max_tokens=32768,
reference="https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)


gte_Qwen2_1_5B_instruct = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
torch_dtype="auto",
normalized=True,
),
name="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
languages=["eng_Latn"],
open_weights=True,
revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
release_date="2024-07-29", # initial commit of hf model.
n_parameters=1_780_000_000,
memory_usage=None,
embed_dim=8960,
license="apache-2.0",
max_tokens=131072,
reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)
5 changes: 5 additions & 0 deletions mteb/models/instruct_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ def __init__(
"No instruction template provided. Instructions will be used as-is."
)

if "gte-Qwen" in model_name_or_path:
logger.warning(
"Instructions are used in both query and docs, which may cause performance discrepancies from the original implementation."
)

self.instruction_template = instruction_template
super().__init__(model_name_or_path=model_name_or_path, mode=mode, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion mteb/models/jina_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def encode(


jina_embeddings_v3 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
JinaWrapper,
model="jinaai/jina-embeddings-v3",
revision="215a6e121fa0183376388ac6b1ae230326bfeaed",
Expand Down
40 changes: 40 additions & 0 deletions mteb/models/linq_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

from functools import partial

import torch

from mteb.model_meta import ModelMeta
from mteb.models.instruct_wrapper import instruct_wrapper


def instruction_template(instruction: str) -> str:
return f"Instruct: {instruction}\nQuery: " if instruction else ""


Linq_Embed_Mistral = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Linq-AI-Research/Linq-Embed-Mistral",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
torch_dtype=torch.bfloat16,
normalized=True,
),
name="Linq-AI-Research/Linq-Embed-Mistral",
languages=["eng_Latn"],
open_weights=True,
revision="0c1a0b0589177079acc552433cad51d7c9132379",
release_date="2024-05-29", # initial commit of hf model.
n_parameters=7_110_000_000,
memory_usage=None,
embed_dim=4096,
license="cc-by-nc-4.0",
max_tokens=32768,
reference="https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)
2 changes: 1 addition & 1 deletion mteb/models/mxbai_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from mteb.model_meta import ModelMeta, sentence_transformers_loader

mxbai_embed_large_v1 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="mixedbread-ai/mxbai-embed-large-v1",
revision="990580e27d329c7408b3741ecff85876e128e203",
Expand Down
Loading