Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mteb/abstasks/Image/AbsTaskAny2AnyRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
from datasets import Features, Value, load_dataset
from PIL import Image

from ..AbsTask import AbsTask
from ...evaluation.evaluators import Any2AnyRetrievalEvaluator
from ...load_results.mteb_results import ScoresDict
from ..AbsTask import AbsTask

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/Image/AbsTaskImageClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

import numpy as np

from ..AbsTask import AbsTask
from ...encoder_interface import Encoder
from ...evaluation.evaluators import (
ImagekNNClassificationEvaluator,
ImagekNNClassificationEvaluatorPytorch,
ImagelogRegClassificationEvaluator,
)
from ...load_results.mteb_results import HFSubset, ScoresDict
from ..AbsTask import AbsTask

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/Image/AbsTaskImageClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from datasets import Dataset

from ..AbsTask import AbsTask
from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from ...evaluation.evaluators import ImageClusteringEvaluator
from ...load_results.mteb_results import HFSubset, ScoresDict
from ..AbsTask import AbsTask

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MultiLabelBinarizer

from ..AbsTask import AbsTask
from ...encoder_interface import Encoder
from ...load_results.mteb_results import HFSubset, ScoresDict
from ..AbsTask import AbsTask

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/Image/AbsTaskImageTextPairClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from datasets import Dataset
from tqdm import tqdm

from ..AbsTask import AbsTask
from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from ...evaluation.evaluators import ImageTextPairClassificationEvaluator
from ...load_results.mteb_results import ScoresDict
from ..AbsTask import AbsTask

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/Image/AbsTaskZeroshotClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from datasets import Dataset

from ..AbsTask import AbsTask
from ...encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from ...evaluation.evaluators import ZeroshotClassificationEvaluator
from ...load_results.mteb_results import ScoresDict
from ..AbsTask import AbsTask

logger = logging.getLogger(__name__)

Expand Down
21 changes: 20 additions & 1 deletion mteb/evaluation/evaluators/Image/ClusteringEvaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sklearn
import sklearn.cluster
from PIL import Image
from scipy.optimize import linear_sum_assignment
from sklearn import metrics

from mteb.encoder_interface import Encoder
Expand Down Expand Up @@ -53,6 +54,24 @@ def __call__(self, model: Encoder, *, encode_kwargs: dict[str, Any] = {}):

logger.info("Evaluating...")
v_measure = metrics.cluster.v_measure_score(self.labels, cluster_assignment)
nmi = metrics.cluster.normalized_mutual_info_score(
self.labels, cluster_assignment
)
ari = metrics.cluster.adjusted_rand_score(self.labels, cluster_assignment)

accuracy = metrics.accuracy_score(self.labels, cluster_assignment)

return {"v_measure": v_measure, "accuracy": accuracy}
matrix = metrics.confusion_matrix(self.labels, cluster_assignment)

# get linear sum assignment
row_ind, col_ind = linear_sum_assignment(matrix, maximize=True)
total_correct = matrix[row_ind, col_ind].sum()
clustering_accuracy = total_correct / len(self.labels)

return {
"v_measure": v_measure,
"accuracy": accuracy,
"nmi": nmi,
"ari": ari,
"cluster_accuracy": clustering_accuracy,
}
6 changes: 3 additions & 3 deletions mteb/evaluation/evaluators/Image/VisualSTSEvaluator.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from __future__ import annotations

import logging
from typing import Any
import math
import os
from typing import Any

import numpy as np
import torch
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics.pairwise import (
paired_cosine_distances,
paired_euclidean_distances,
paired_manhattan_distances,
)
import math
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

Expand Down
4 changes: 4 additions & 0 deletions mteb/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from mteb.models import (
align_models,
bge_models,
blip2_models,
blip_models,
bm25,
clip_models,
cohere_models,
Expand Down Expand Up @@ -130,6 +132,8 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe
model_modules = [
align_models,
bge_models,
blip_models,
blip2_models,
bm25,
cohere_models,
dino_models,
Expand Down
Loading