diff --git a/mteb/abstasks/Image/AbsTaskImageClassification.py b/mteb/abstasks/Image/AbsTaskImageClassification.py index c7afbe1136..d3ba2dabf0 100644 --- a/mteb/abstasks/Image/AbsTaskImageClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageClassification.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from collections import defaultdict +from collections import Counter, defaultdict from typing import Any import numpy as np @@ -16,12 +16,45 @@ ImagelogRegClassificationEvaluator, ) from ..AbsTask import AbsTask, ScoresDict +from ..TaskMetadata import DescriptiveStatistics ImageFile.LOAD_TRUNCATED_IMAGES = True logger = logging.getLogger(__name__) +class ImageClassificationDescriptiveStatistics(DescriptiveStatistics): + """Descriptive statistics for ImageClassification + + Attributes: + num_samples: number of samples in the dataset. + + min_image_width: Minimum width of images + average_image_width: Average width of images + max_image_width: Maximum width of images + + min_image_height: Minimum height of images + average_image_height: Average height of images + max_image_height: Maximum height of images + + unique_labels: Number of unique labels + labels: dict of label frequencies + """ + + num_samples: int + + min_image_width: float + average_image_width: float + max_image_width: float + + min_image_height: float + average_image_height: float + max_image_height: float + + unique_num_labels: int + labels: dict[str, dict[str, int]] + + class AbsTaskImageClassification(AbsTask): """Abstract class for kNN classification tasks The similarity is computed between pairs and the results are ranked. @@ -73,8 +106,43 @@ def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None: def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False - ): - pass + ) -> ImageClassificationDescriptiveStatistics: + if hf_subset: + imgs = self.dataset[hf_subset][split][self.image_column_name] + labels = self.dataset[hf_subset][split][self.label_column_name] + elif compute_overall: + imgs = [] + labels = [] + for hf_subset in self.metadata.eval_langs: + imgs.extend(self.dataset[hf_subset][split][self.image_column_name]) + labels.extend(self.dataset[hf_subset][split][self.label_column_name]) + else: + imgs = self.dataset[split][self.image_column_name] + labels = self.dataset[split][self.label_column_name] + + num_samples = len(labels) + unique_num_labels = len(set(labels)) + label_count = Counter(labels) + + img_widths, img_heights = [], [] + for img in imgs: + width, height = img.size + img_heights.append(height) + img_widths.append(width) + + return ImageClassificationDescriptiveStatistics( + num_samples=num_samples, + unique_num_labels=unique_num_labels, + min_image_width=min(img_widths), + average_image_width=sum(img_widths) / len(img_widths), + max_image_width=max(img_widths), + min_image_height=min(img_heights), + average_image_height=sum(img_heights) / len(img_heights), + max_image_height=max(img_heights), + labels={ + str(label): {"count": count} for label, count in label_count.items() + }, + ) def evaluate( self, diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 24b3c9fa23..db28dc1915 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -100,7 +100,20 @@ "rendered", "multiple", ] -TASK_TYPE = Literal[ + +MIEB_TASK_TYPE = ( + "Any2AnyMultiChoice", + "Any2AnyRetrieval", + "Any2TextMutipleChoice", + "ImageClustering", + "ImageClassification", + "ImageMultilabelClassification", + "ImageTextPairClassification", + "VisualSTS", + "ZeroShotClassification", +) + +TASK_TYPE = ( "BitextMining", "Classification", "MultilabelClassification", @@ -112,16 +125,9 @@ "Summarization", "InstructionRetrieval", "Speed", - "Any2AnyMultiChoice", - "Any2AnyRetrieval", - "Any2TextMutipleChoice", - "ImageClustering", - "ImageClassification", - "ImageMultilabelClassification", - "ImageTextPairClassification", - "VisualSTS", - "ZeroShotClassification", -] +) + MIEB_TASK_TYPE + +TASK_TYPE = Literal[TASK_TYPE] TASK_CATEGORY = Literal[ @@ -455,9 +461,11 @@ def descriptive_stats(self) -> dict[str, DescriptiveStatistics] | None: def descriptive_stat_path(self) -> Path: """Return the path to the descriptive statistics file.""" descriptive_stat_base_dir = Path(__file__).parent.parent / "descriptive_stats" + if self.type in MIEB_TASK_TYPE: + descriptive_stat_base_dir = descriptive_stat_base_dir / "Image" + task_type_dir = descriptive_stat_base_dir / self.type if not descriptive_stat_base_dir.exists(): descriptive_stat_base_dir.mkdir() - task_type_dir = descriptive_stat_base_dir / self.type if not task_type_dir.exists(): task_type_dir.mkdir() return task_type_dir / f"{self.name}.json" diff --git a/mteb/descriptive_stats/Image/ImageClassification/MNIST.json b/mteb/descriptive_stats/Image/ImageClassification/MNIST.json new file mode 100644 index 0000000000..c028ae135d --- /dev/null +++ b/mteb/descriptive_stats/Image/ImageClassification/MNIST.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 10000, + "unique_num_labels": 10, + "min_image_width": 28, + "average_image_width": 28.0, + "max_image_width": 28, + "min_image_height": 28, + "average_image_height": 28.0, + "max_image_height": 28, + "labels": { + "7": { + "count": 1028 + }, + "2": { + "count": 1032 + }, + "1": { + "count": 1135 + }, + "0": { + "count": 980 + }, + "4": { + "count": 982 + }, + "9": { + "count": 1009 + }, + "5": { + "count": 892 + }, + "6": { + "count": 958 + }, + "3": { + "count": 1010 + }, + "8": { + "count": 974 + } + } + } +} \ No newline at end of file