diff --git a/mteb/models/align_models.py b/mteb/models/align_models.py index a47e199209..e8f55c64ba 100644 --- a/mteb/models/align_models.py +++ b/mteb/models/align_models.py @@ -57,6 +57,12 @@ def get_image_embeddings( with torch.no_grad(): for i in tqdm(range(0, len(images), batch_size)): batch_images = images[i : i + batch_size] + batch_images = [ + img.convert("RGB") + if isinstance(img, Image.Image) and img.mode != "RGB" + else img + for img in batch_images + ] inputs = self.processor( images=batch_images, return_tensors="pt", padding=True ) diff --git a/mteb/tasks/Image/Clustering/eng/CIFAR.py b/mteb/tasks/Image/Clustering/eng/CIFAR.py index 2bde390661..f9d08b684a 100644 --- a/mteb/tasks/Image/Clustering/eng/CIFAR.py +++ b/mteb/tasks/Image/Clustering/eng/CIFAR.py @@ -14,7 +14,7 @@ class CIFAR10Clustering(AbsTaskImageClustering): "revision": "0b2714987fa478483af9968de7c934580d0bb9a2", }, type="ImageClustering", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="nmi", diff --git a/mteb/tasks/Image/Clustering/eng/ImageNet.py b/mteb/tasks/Image/Clustering/eng/ImageNet.py index a2377e2334..aa0ab5720b 100644 --- a/mteb/tasks/Image/Clustering/eng/ImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/ImageNet.py @@ -14,7 +14,7 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): "revision": "bfb6ad3b2109d26c9daddf14f98d315daa35ee72", }, type="ImageClustering", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="nmi", diff --git a/mteb/tasks/Image/Clustering/eng/TinyImageNet.py b/mteb/tasks/Image/Clustering/eng/TinyImageNet.py index a5760b6b9c..d49ebbfde6 100644 --- a/mteb/tasks/Image/Clustering/eng/TinyImageNet.py +++ b/mteb/tasks/Image/Clustering/eng/TinyImageNet.py @@ -14,7 +14,7 @@ class TinyImageNet(AbsTaskImageClustering): "revision": "5a77092c28e51558c5586e9c5eb71a7e17a5e43f", }, type="ImageClustering", - category="s2s", + category="i2i", eval_splits=["valid"], eval_langs=["eng-Latn"], main_score="nmi", diff --git a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py index 2b08169aae..2e11094b09 100644 --- a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py @@ -14,7 +14,7 @@ class BirdsnapClassification(AbsTaskImageClassification): "revision": "fd23015508be94f0b5b59d61630e4ea2536509e4", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py index 5b4d096783..abed2ad617 100644 --- a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py @@ -14,7 +14,7 @@ class CIFAR10Classification(AbsTaskImageClassification): "revision": "0b2714987fa478483af9968de7c934580d0bb9a2", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py index 76843f50ba..30112cdf1d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py @@ -16,7 +16,7 @@ class Caltech101Classification(AbsTaskImageClassification): "trust_remote_code": True, }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py index 14427cd530..b73f895595 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py @@ -14,7 +14,7 @@ class Country211Classification(AbsTaskImageClassification): "revision": "1699f138f0558342a1cbf99f7cf36b4361bb5ebc", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py index 48a6ca5243..eb7360f088 100644 --- a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py @@ -14,7 +14,7 @@ class DTDClassification(AbsTaskImageClassification): "revision": "d2afa97d9f335b1a6b3b09c637aef667f98f966e", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py index 588cce89aa..5cac334c3d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py @@ -14,7 +14,7 @@ class EuroSATClassification(AbsTaskImageClassification): "revision": "b4e28552cd5f3932b6abc37eb20d3e84901ad728", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py index 81c2fc5857..074e92529a 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py @@ -14,7 +14,7 @@ class FER2013Classification(AbsTaskImageClassification): "revision": "9399b94167523fe5c40b3a857e24ef931ee4395b", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py index 2971faf863..8b2a41bd50 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py @@ -15,7 +15,7 @@ class FGVCAircraftClassification(AbsTaskImageClassification): "trust_remote_code": True, }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py index a0dee80ad0..1bbe8e106b 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py @@ -14,7 +14,7 @@ class Food101Classification(AbsTaskImageClassification): "revision": "e06acf2a88084f04bce4d4a525165d68e0a36c38", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["validation"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py index 4a77c47e98..3244b47dc8 100644 --- a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py @@ -14,7 +14,7 @@ class GTSRBClassification(AbsTaskImageClassification): "revision": "1c13eff0803d2b02c9dc8dfe85e67770b3f0f3c5", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py index ff91119015..bed879d282 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py +++ b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py @@ -14,7 +14,7 @@ class Imagenet1kClassification(AbsTaskImageClassification): "revision": "b24c7a5a3ef12df09089055d1795e2ce7c7e7397", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py index 7e4b81f3f6..8230938a14 100644 --- a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py @@ -14,7 +14,7 @@ class MNISTClassification(AbsTaskImageClassification): "revision": "77f3279092a1c1579b2250db8eafed0ad422088c", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py index 316107534f..7f607d6aac 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py @@ -14,7 +14,7 @@ class OxfordFlowersClassification(AbsTaskImageClassification): "revision": "a37b1891609c0376fa81eced756e7863e1bd873b", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py index 39620326ab..28a2357d5c 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py @@ -14,7 +14,7 @@ class OxfordPetsClassification(AbsTaskImageClassification): "revision": "557b480fae8d69247be74d9503b378a09425096f", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py index e2518dc221..27508c8c17 100644 --- a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py @@ -14,7 +14,7 @@ class PatchCamelyonClassification(AbsTaskImageClassification): "revision": "502695fe1a141108650e3c5b91c8b5e0ff84ed49", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py index b14d31cdc0..7fa7cd5d3d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py @@ -14,7 +14,7 @@ class RESISC45Classification(AbsTaskImageClassification): "revision": "fe12fc5f1b7606543b0355eda392f1ddc54625c6", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py index 7acd6bb0eb..11ea833477 100644 --- a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py @@ -14,7 +14,7 @@ class STL10Classification(AbsTaskImageClassification): "revision": "49ae7f94508f7feae62baf836db284306eab0b0f", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py index 4d0f987564..b4b5a8b931 100644 --- a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py @@ -14,7 +14,7 @@ class SUN397Classification(AbsTaskImageClassification): "revision": "7e6af6a2499ad708618be868e1471eac0aca1168", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py index f9836e4b05..74fa5e92b8 100644 --- a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py @@ -14,7 +14,7 @@ class StanfordCarsClassification(AbsTaskImageClassification): "revision": "09ffe9bc7864d3f1e851529e5c4b7e05601a04fb", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py index 68d2cb74b7..dc4021b490 100644 --- a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py @@ -18,7 +18,7 @@ class UCF101Classification(AbsTaskImageClassification): "revision": "1098eed48f2929443f47c39f3b5c814e16369c11", }, type="ImageClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py b/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py index bd5d0aad6d..4ceae17ff9 100644 --- a/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py +++ b/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py @@ -18,7 +18,7 @@ class VOC2007Classification(AbsTaskImageMultilabelClassification): "trust_remote_code": True, }, type="ImageMultilabelClassification", - category="i2t", + category="i2i", eval_splits=["test"], eval_langs=["eng-Latn"], main_score="accuracy", diff --git a/mteb/tasks/Image/ImageTextPairClassification/Winoground.py b/mteb/tasks/Image/ImageTextPairClassification/Winoground.py index 0b8a8bedd7..6169182286 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/Winoground.py +++ b/mteb/tasks/Image/ImageTextPairClassification/Winoground.py @@ -16,8 +16,7 @@ class Winoground(AbsTaskImageTextPairClassification): reference="https://openaccess.thecvf.com/content/CVPR2022/html/Thrush_Winoground_Probing_Vision_and_Language_Models_for_Visio-Linguistic_Compositionality_CVPR_2022_paper", dataset={ "path": "facebook/winoground", - "revision": "521ec2ba6f9a5d7380f7cca5a7b44aea5c1d677c", - "trust_remote_code": True, + "revision": "b400e173549071916ad1b3d449293bc8d8b4b763", }, type="ImageTextPairClassification", category="i2t",