Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mteb/models/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
vista_models,
vlm2vec_models,
voyage_models,
voyage_v,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -81,6 +82,7 @@
siglip_models,
vista_models,
voyage_models,
voyage_v,
vlm2vec_models,
repllama_models,
promptriever_models,
Expand Down
179 changes: 179 additions & 0 deletions mteb/models/voyage_v.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from __future__ import annotations

import os
from functools import partial
from typing import Any

import torch
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import transforms
from tqdm import tqdm

import mteb
from mteb.model_meta import ModelMeta

api_key = os.getenv("VOYAGE_API_KEY")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete

tensor_to_image = transforms.Compose([transforms.ToPILImage()])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move inside class?



def voyage_v_loader(**kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move the import into the class to avoid the wrapper function

try:
import voyageai
except ImportError:
raise ImportError("To use voyage models, please run `pip install -U voyageai`.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pip install mteb[voyageai] and add to pyproject.toml with version


class VoyageMultiModalModelWrapper:
def __init__(
self,
model_name: str,
**kwargs: Any,
):
self.model_name = model_name
self.vo = voyageai.Client()

def get_text_embeddings(
self, texts: list[str], batch_size: int = 32, input_type=None
):
all_text_embeddings = []

for i in tqdm(range(0, len(texts), batch_size)):
batch_texts = texts[i : i + batch_size]
batch_texts = [[text] for text in batch_texts]
all_text_embeddings += torch.tensor(
self.vo.multimodal_embed(
batch_texts, model=self.model_name, input_type=input_type
).embeddings
)
all_text_embeddings = torch.vstack(all_text_embeddings)
return all_text_embeddings

def get_image_embeddings(
self,
images: list[Image.Image] | DataLoader,
batch_size: int = 32,
input_type=None,
):
all_image_embeddings = []

if isinstance(images, DataLoader):
for index, batch in enumerate(tqdm(images)):
if index == 0:
assert len(batch) == batch_size
batch_images = [[tensor_to_image(image)] for image in batch]
all_image_embeddings += torch.tensor(
self.vo.multimodal_embed(
batch_images, model=self.model_name, input_type=input_type
).embeddings
)
else:
for i in tqdm(range(0, len(images), batch_size)):
batch_images = images[i : i + batch_size]
batch_images = [[image] for image in batch_images]
all_image_embeddings += torch.tensor(
self.vo.multimodal_embed(
batch_images, model=self.model_name, input_type=input_type
).embeddings
)
all_image_embeddings = torch.vstack(all_image_embeddings)
return all_image_embeddings

def calculate_probs(self, text_embeddings, image_embeddings):
text_embeddings = text_embeddings / text_embeddings.norm(
dim=-1, keepdim=True
)
image_embeddings = image_embeddings / image_embeddings.norm(
dim=-1, keepdim=True
)
logits = torch.matmul(image_embeddings, text_embeddings.T)
probs = (logits * 100).softmax(dim=-1)
return probs

def get_fused_embeddings(
self,
texts: list[str] = None,
images: list[Image.Image] | DataLoader = None,
batch_size: int = 32,
input_type=None,
):
if texts is None and images is None:
raise ValueError("Either texts or images must be provided")

text_embeddings = None
image_embeddings = None

interleaved_embeddings = []
if texts is not None and images is not None:
# print("encoding interleaved inputs")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(general)

if isinstance(images, DataLoader):
for index, batch in tqdm(enumerate(images)):
if index == 0:
assert len(batch) == batch_size
batch_images = [tensor_to_image(image) for image in batch]
batch_texts = texts[
index * batch_size : (index + 1) * batch_size
]
interleaved_inputs = [
[text, image]
for image, text in zip(batch_images, batch_texts)
]
interleaved_embeddings += torch.tensor(
self.vo.multimodal_embed(
interleaved_inputs,
model=self.model_name,
input_type=input_type,
).embeddings
)
else:
for i in tqdm(range(0, len(images), batch_size)):
batch_images = images[i : i + batch_size]
batch_texts = texts[i : i + batch_size]
interleaved_inputs = [
[text, image]
for image, text in zip(batch_images, batch_texts)
]
interleaved_embeddings += torch.tensor(
self.vo.multimodal_embed(
interleaved_inputs,
model=self.model_name,
input_type=input_type,
).embeddings
)
interleaved_embeddings = torch.vstack(interleaved_embeddings)
return interleaved_embeddings

elif texts is not None:
# print("encoding texts only")
text_embeddings = self.get_text_embeddings(texts, batch_size)

elif images is not None:
# print("encoding images only")
image_embeddings = self.get_image_embeddings(images, batch_size)

if text_embeddings is not None:
return text_embeddings
elif image_embeddings is not None:
return image_embeddings

return VoyageMultiModalModelWrapper(**kwargs)


cohere_mult_3 = ModelMeta(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

voyage_mult_3 **

loader=partial(voyage_v_loader, model_name="voyage-multimodal-3"),
name="voyage-multimodal-3",
languages=[], # Unknown
open_source=False,
revision="1",
release_date="2024-11-10",
n_parameters=None,
memory_usage=None,
max_tokens=None,
embed_dim=1024,
license=None,
similarity_fn_name="cosine",
framework=[],
)

if __name__ == "__main__":
mdl = mteb.get_model(cohere_mult_3.name, cohere_mult_3.revision)
emb = mdl.encode(["Hello, world!"])
22 changes: 22 additions & 0 deletions results-mieb/voyage-multimodal-3/1/CIFAR100Clustering.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "aadb3af77e9048adbea6b47c21a81e47dd092ae5",
"evaluation_time": 231.98164081573486,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"ari": 0.46531392836213886,
"cluster_accuracy": 0.6042,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.7515067668506068,
"nmi": 0.7515067668506068,
"v_measure": 0.7515067668506068
}
]
},
"task_name": "CIFAR100Clustering"
}
22 changes: 22 additions & 0 deletions results-mieb/voyage-multimodal-3/1/CIFAR10Clustering.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "0b2714987fa478483af9968de7c934580d0bb9a2",
"evaluation_time": 236.98327565193176,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"ari": 0.7986159784876974,
"cluster_accuracy": 0.8244,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.8622309717386565,
"nmi": 0.8622309717386565,
"v_measure": 0.8622309717386566
}
]
},
"task_name": "CIFAR10Clustering"
}
19 changes: 19 additions & 0 deletions results-mieb/voyage-multimodal-3/1/CVBenchCount.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"dataset_revision": "22409a927ab5cf68e3655023d51694587455fc99",
"evaluation_time": 444.4619266986847,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"accuracy": 0.2639593908629442,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.2639593908629442
}
]
},
"task_name": "CVBenchCount"
}
19 changes: 19 additions & 0 deletions results-mieb/voyage-multimodal-3/1/CVBenchDepth.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"dataset_revision": "22409a927ab5cf68e3655023d51694587455fc99",
"evaluation_time": 413.3635849952698,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"accuracy": 0.5316666666666666,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.5316666666666666
}
]
},
"task_name": "CVBenchDepth"
}
19 changes: 19 additions & 0 deletions results-mieb/voyage-multimodal-3/1/CVBenchDistance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"dataset_revision": "22409a927ab5cf68e3655023d51694587455fc99",
"evaluation_time": 402.95056080818176,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"accuracy": 0.475,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.475
}
]
},
"task_name": "CVBenchDistance"
}
19 changes: 19 additions & 0 deletions results-mieb/voyage-multimodal-3/1/CVBenchRelation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"dataset_revision": "22409a927ab5cf68e3655023d51694587455fc99",
"evaluation_time": 201.59757685661316,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"accuracy": 0.5353846153846153,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.5353846153846153
}
]
},
"task_name": "CVBenchRelation"
}
22 changes: 22 additions & 0 deletions results-mieb/voyage-multimodal-3/1/ImageNet10Clustering.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "88f8a6d47c257895094c5ad81e67ba751771fc99",
"evaluation_time": 914.053968667984,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"ari": 0.9808613040028806,
"cluster_accuracy": 0.9913076923076923,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.9758070118084365,
"nmi": 0.9758070118084365,
"v_measure": 0.9758070118084365
}
]
},
"task_name": "ImageNet10Clustering"
}
22 changes: 22 additions & 0 deletions results-mieb/voyage-multimodal-3/1/ImageNetDog15Clustering.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "bfb6ad3b2109d26c9daddf14f98d315daa35ee72",
"evaluation_time": 78.63793706893921,
"kg_co2_emissions": null,
"mteb_version": "1.14.15",
"scores": {
"test": [
{
"ari": 0.8119121646758497,
"cluster_accuracy": 0.820631970260223,
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.8381839891894807,
"nmi": 0.8381839891894807,
"v_measure": 0.8381839891894807
}
]
},
"task_name": "ImageNetDog15Clustering"
}
Loading