diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index ec956bcabd..d996554c61 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -273,17 +273,20 @@ def calculate_metadata_metrics( self, overwrite_results: bool = False ) -> dict[str, DescriptiveStatistics | dict[str, DescriptiveStatistics]]: """Calculates descriptive statistics from the dataset by calling `_calculate_metrics_from_split`.""" + from mteb.abstasks import AbsTaskAnyClassification + if self.metadata.descriptive_stat_path.exists() and not overwrite_results: logger.info("Loading metadata descriptive statistics from cache.") return self.metadata.descriptive_stats - self.load_data() + if not self.data_loaded: + self.load_data() descriptive_stats = {} hf_subset_stat = "hf_subset_descriptive_stats" eval_splits = self.metadata.eval_splits - if self.metadata.type in ["Classification", "MultilabelClassification"]: - eval_splits += ["train"] + if isinstance(self, AbsTaskAnyClassification): + eval_splits.append(self.train_split) pbar_split = tqdm.tqdm(eval_splits, desc="Processing Splits...") for split in pbar_split: @@ -408,15 +411,28 @@ def filter_languages( def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None: scores["main_score"] = scores[self.metadata.main_score] - def _upload_dataset_to_hub(self, repo_name: str, fields: list[str]) -> None: + def _upload_dataset_to_hub( + self, repo_name: str, fields: list[str] | dict[str, str] + ) -> None: if self.metadata.is_multilingual: for config in self.metadata.eval_langs: logger.info(f"Converting {config} of {self.metadata.name}") sentences = {} for split in self.dataset[config]: - sentences[split] = Dataset.from_dict( - {field: self.dataset[config][split][field] for field in fields} - ) + if isinstance(fields, dict): + sentences[split] = Dataset.from_dict( + { + mapped_name: self.dataset[config][split][original_name] + for original_name, mapped_name in fields.items() + } + ) + else: + sentences[split] = Dataset.from_dict( + { + field: self.dataset[config][split][field] + for field in fields + } + ) sentences = DatasetDict(sentences) sentences.push_to_hub( repo_name, config, commit_message=f"Add {config} dataset" @@ -424,9 +440,17 @@ def _upload_dataset_to_hub(self, repo_name: str, fields: list[str]) -> None: else: sentences = {} for split in self.dataset: - sentences[split] = Dataset.from_dict( - {field: self.dataset[split][field] for field in fields} - ) + if isinstance(fields, dict): + sentences[split] = Dataset.from_dict( + { + mapped_name: self.dataset[split][original_name] + for original_name, mapped_name in fields.items() + } + ) + else: + sentences[split] = Dataset.from_dict( + {field: self.dataset[split][field] for field in fields} + ) sentences = DatasetDict(sentences) sentences.push_to_hub(repo_name, commit_message="Add dataset") diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskAnyClassification.py similarity index 58% rename from mteb/abstasks/AbsTaskClassification.py rename to mteb/abstasks/AbsTaskAnyClassification.py index e259fe3aa8..f00525cc90 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskAnyClassification.py @@ -6,16 +6,23 @@ import numpy as np from datasets import Dataset, DatasetDict - -from mteb.abstasks.TaskMetadata import DescriptiveStatistics +from PIL import ImageFile +from sklearn.base import BaseEstimator +from sklearn.linear_model import LogisticRegression + +from mteb.abstasks.TaskMetadata import ( + DescriptiveStatistics, + ImageStatistics, + LabelStatistics, + TextStatistics, +) from mteb.encoder_interface import Encoder -from ..evaluation.evaluators import ( - logRegClassificationEvaluator, -) +from ..evaluation.evaluators.ClassificationEvaluator import ClassificationEvaluator from ..load_results.task_results import HFSubset, ScoresDict from .AbsTask import AbsTask +ImageFile.LOAD_TRUNCATED_IMAGES = True logger = logging.getLogger(__name__) @@ -27,35 +34,21 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): number_of_characters: Total number of symbols in the dataset. number_texts_intersect_with_train: Number of texts in the train split - min_text_length: Minimum length of text - average_text_length: Average length of text - max_text_length: Maximum length of text - unique_texts: Number of unique texts - - min_labels_per_text: Minimum number of labels per text - average_label_per_text: Average number of labels per text - max_labels_per_text: Maximum number of labels per text - unique_labels: Number of unique labels - labels: dict of label frequencies + text_statistics: Statistics for text + image_statistics: Statistics for images + label_statistics: Statistics for labels """ num_samples: int - number_of_characters: int + number_of_characters: int | None number_texts_intersect_with_train: int | None - min_text_length: int - average_text_length: float - max_text_length: int - unique_texts: int - - min_labels_per_text: int - average_label_per_text: float - max_labels_per_text: int - unique_labels: int - labels: dict[str, dict[str, int]] + text_statistics: TextStatistics | None + image_statistics: ImageStatistics | None + label_statistics: LabelStatistics -class AbsTaskClassification(AbsTask): +class AbsTaskAnyClassification(AbsTask): """Abstract class for classification tasks The similarity is computed between pairs and the results are ranked. @@ -69,12 +62,19 @@ class AbsTaskClassification(AbsTask): """ - evaluator = logRegClassificationEvaluator - abstask_prompt = "Classify user passages." + evaluator: type[ClassificationEvaluator] = ClassificationEvaluator + classifier: BaseEstimator = LogisticRegression( + n_jobs=-1, + max_iter=100, + ) + samples_per_label: int = 8 n_experiments: int = 10 k: int = 3 - train_split = "train" + train_split: str = "train" + label_column_name: str = "label" + input_column_name: str = "text" + abstask_prompt = "Classify user passages." def evaluate( self, @@ -88,6 +88,8 @@ def evaluate( if not self.data_loaded: self.load_data() + if "random_state" in self.classifier.get_params(): + self.classifier = self.classifier.set_params(random_state=self.seed) scores = {} hf_subsets = self.hf_subsets if subsets_to_run is not None: @@ -117,7 +119,7 @@ def evaluate( def _evaluate_subset( self, model: Encoder, - dataset: DatasetDict | Dataset, + dataset: DatasetDict, hf_split: str, hf_subset: str, encode_kwargs: dict[str, Any], @@ -140,16 +142,18 @@ def _evaluate_subset( # Bootstrap `self.samples_per_label` samples per label for each split train_dataset, idxs = self._undersample_data( train_split, - self.samples_per_label, idxs, ) evaluator = self.evaluator( train_dataset, eval_split, + self.input_column_name, + self.label_column_name, task_metadata=self.metadata, hf_split=hf_split, hf_subset=hf_subset, + classifier=self.classifier, **params, ) scores_exp, test_cache = evaluator( @@ -164,13 +168,12 @@ def _evaluate_subset( return avg_scores def _undersample_data( - self, dataset: Dataset, samples_per_label: int, idxs=None + self, dataset: Dataset, idxs: list[int] | None = None ) -> tuple[Dataset, list[int]]: """Undersample data to have `samples_per_label` samples of each label. Args: dataset: Hugging Face `datasets.Dataset` containing "text" and "label". - samples_per_label: Number of samples per label to retain. idxs: Optional indices to shuffle and sample from. Returns: @@ -187,8 +190,8 @@ def _undersample_data( sampled_idxs = [] for i in idxs: - label = dataset[i]["label"] - if label_counter[label] < samples_per_label: + label = dataset[i][self.label_column_name] + if label_counter[label] < self.samples_per_label: sampled_idxs.append(i) label_counter[label] += 1 @@ -199,26 +202,50 @@ def _calculate_metrics_from_split( ) -> ClassificationDescriptiveStatistics: train_text = [] if hf_subset: - text = self.dataset[hf_subset][split]["text"] - label = self.dataset[hf_subset][split]["label"] - if split != "train": - train_text = self.dataset[hf_subset]["train"]["text"] + inputs = self.dataset[hf_subset][split][self.input_column_name] + label = self.dataset[hf_subset][split][self.label_column_name] + if split != self.train_split: + train_text = self.dataset[hf_subset][self.train_split][ + self.input_column_name + ] elif compute_overall: - text = [] + inputs = [] label = [] for hf_subset in self.metadata.eval_langs: - text.extend(self.dataset[hf_subset][split]["text"]) - label.extend(self.dataset[hf_subset][split]["label"]) - if split != "train": - train_text.extend(self.dataset[hf_subset]["train"]["text"]) + inputs.extend(self.dataset[hf_subset][split][self.input_column_name]) + label.extend(self.dataset[hf_subset][split][self.label_column_name]) + if split != self.train_split: + train_text.extend( + self.dataset[hf_subset][self.train_split][ + self.input_column_name + ] + ) else: - text = self.dataset[split]["text"] - label = self.dataset[split]["label"] - if split != "train": - train_text = self.dataset["train"]["text"] + inputs = self.dataset[split][self.input_column_name] + label = self.dataset[split][self.label_column_name] + if split != self.train_split: + train_text = self.dataset[self.train_split][self.input_column_name] + + total_text_len = 0 + text_len = None + img_widths, img_heights = None, None + num_texts_in_train = None + + if "image" in self.metadata.modalities: + img_widths, img_heights = [], [] + for img in inputs: + width, height = img.size # type: ignore + img_heights.append(height) + img_widths.append(width) + if "text" in self.metadata.modalities: + text_len = [len(t) for t in inputs] + total_text_len = sum(text_len) + num_texts_in_train = ( + len(set(inputs) & set(train_text)) + if split != self.train_split + else None + ) - text_len = [len(t) for t in text] - total_text_len = sum(text_len) if isinstance(label[0], int): label_len = [1] * len(label) total_label_len = len(label) @@ -230,18 +257,32 @@ def _calculate_metrics_from_split( total_labels = [] for l in label: total_labels.extend(l if len(l) > 0 else [None]) + label_count = Counter(total_labels) - num_texts_in_train = ( - len(set(text) & set(train_text)) if split != "train" else None - ) - return ClassificationDescriptiveStatistics( - num_samples=len(text), - number_of_characters=total_text_len, - number_texts_intersect_with_train=num_texts_in_train, - min_text_length=min(text_len), - average_text_length=total_text_len / len(text), - max_text_length=max(text_len), - unique_texts=len(set(text)), + + if text_len: + text_statistics = TextStatistics( + min_text_length=min(text_len), + average_text_length=total_text_len / len(inputs), + max_text_length=max(text_len), + unique_texts=len(set(inputs)), + ) + else: + text_statistics = None + + if img_widths: + image_statistics = ImageStatistics( + min_image_width=min(img_widths), + average_image_width=sum(img_widths) / len(img_widths), + max_image_width=max(img_widths), + min_image_height=min(img_heights), + average_image_height=sum(img_heights) / len(img_heights), + max_image_height=max(img_heights), + ) + else: + image_statistics = None + + label_statistics = LabelStatistics( min_labels_per_text=min(label_len), average_label_per_text=total_label_len / len(label), max_labels_per_text=max(label_len), @@ -254,5 +295,23 @@ def _calculate_metrics_from_split( }, ) + return ClassificationDescriptiveStatistics( + num_samples=len(inputs), + # text + number_of_characters=total_text_len, + number_texts_intersect_with_train=num_texts_in_train + if num_texts_in_train + else None, + text_statistics=text_statistics, + image_statistics=image_statistics, + label_statistics=label_statistics, + ) + def _push_dataset_to_hub(self, repo_name: str) -> None: - self._upload_dataset_to_hub(repo_name, ["text", "label"]) + self._upload_dataset_to_hub( + repo_name, + [ + self.input_column_name, + self.label_column_name, + ], + ) diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index c1d3b02844..4284a68b34 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -16,7 +16,7 @@ from mteb.encoder_interface import Encoder from ..load_results.task_results import ScoresDict -from .AbsTaskClassification import AbsTaskClassification +from .AbsTaskAnyClassification import AbsTaskAnyClassification logger = logging.getLogger(__name__) @@ -41,7 +41,7 @@ def evaluate_classifier( } -class AbsTaskMultilabelClassification(AbsTaskClassification): +class AbsTaskMultilabelClassification(AbsTaskAnyClassification): """Abstract class for multioutput classification tasks The similarity is computed between pairs and the results are ranked. diff --git a/mteb/abstasks/Image/AbsTaskImageClassification.py b/mteb/abstasks/Image/AbsTaskImageClassification.py deleted file mode 100644 index 3ead816215..0000000000 --- a/mteb/abstasks/Image/AbsTaskImageClassification.py +++ /dev/null @@ -1,263 +0,0 @@ -from __future__ import annotations - -import logging -from collections import Counter, defaultdict -from typing import Any - -import numpy as np -from PIL import ImageFile - -from mteb.abstasks.TaskMetadata import DescriptiveStatistics, HFSubset - -from ...encoder_interface import Encoder -from ...evaluation.evaluators import ( - ImagekNNClassificationEvaluator, - ImagelogRegClassificationEvaluator, -) -from ..AbsTask import AbsTask, ScoresDict - -ImageFile.LOAD_TRUNCATED_IMAGES = True - -logger = logging.getLogger(__name__) - - -class ImageClassificationDescriptiveStatistics(DescriptiveStatistics): - """Descriptive statistics for ImageClassification - - Attributes: - num_samples: number of samples in the dataset. - - min_image_width: Minimum width of images - average_image_width: Average width of images - max_image_width: Maximum width of images - - min_image_height: Minimum height of images - average_image_height: Average height of images - max_image_height: Maximum height of images - - unique_labels: Number of unique labels - labels: dict of label frequencies - """ - - num_samples: int - - min_image_width: float - average_image_width: float - max_image_width: float - - min_image_height: float - average_image_height: float - max_image_height: float - - unique_num_labels: int - labels: dict[str, dict[str, int]] - - -class AbsTaskImageClassification(AbsTask): - """Abstract class for kNN classification tasks - The similarity is computed between pairs and the results are ranked. - - self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It - must contain the following columns: - image: Image.Image - label: int - """ - - image_column_name: str = "image" - label_column_name: str = "label" - samples_per_label: int = 16 - n_experiments: int = 5 - - def __init__( - self, - method: str = "logReg", - k: int = 3, - **kwargs, - ): - super().__init__(**kwargs) - self.method = method - - # kNN parameters - self.k = k - - # Run metadata validation by instantiating addressing the attribute - # This is quite hacky. Ideally, this would be done in the constructor of - # each concrete task, but then we have to duplicate the __init__ method's - # interface. - if hasattr(self, "metadata"): - self.metadata - - def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None: - scores["main_score"] = scores[self.metadata.main_score] - - def _calculate_metrics_from_split( - self, split: str, hf_subset: str | None = None, compute_overall: bool = False - ) -> ImageClassificationDescriptiveStatistics: - if hf_subset: - imgs = self.dataset[hf_subset][split][self.image_column_name] - labels = self.dataset[hf_subset][split][self.label_column_name] - elif compute_overall: - imgs, labels = [], [] - for hf_subset in self.metadata.eval_langs: - imgs.extend(self.dataset[hf_subset][split][self.image_column_name]) - labels.extend(self.dataset[hf_subset][split][self.label_column_name]) - else: - imgs = self.dataset[split][self.image_column_name] - labels = self.dataset[split][self.label_column_name] - - num_samples = len(labels) - unique_num_labels = len(set(labels)) - label_count = Counter(labels) - - img_widths, img_heights = [], [] - for img in imgs: - width, height = img.size # type: ignore - img_heights.append(height) - img_widths.append(width) - - return ImageClassificationDescriptiveStatistics( - num_samples=num_samples, - unique_num_labels=unique_num_labels, - min_image_width=min(img_widths), - average_image_width=sum(img_widths) / len(img_widths), - max_image_width=max(img_widths), - min_image_height=min(img_heights), - average_image_height=sum(img_heights) / len(img_heights), - max_image_height=max(img_heights), - labels={ - str(label): {"count": count} for label, count in label_count.items() - }, - ) - - def evaluate( - self, - model, - eval_split: str = "test", - train_split: str = "train", - *, - encode_kwargs: dict[str, Any], - **kwargs, - ) -> dict[HFSubset, ScoresDict]: - if not self.data_loaded: - self.load_data() - - scores = {} - hf_subsets = self.hf_subsets - - for hf_subset in hf_subsets: - logger.info( - f"\nTask: {self.metadata.name}, split: {eval_split}, subset: {hf_subset}. Running..." - ) - - if hf_subset not in self.dataset and hf_subset == "default": - ds = self.dataset - else: - ds = self.dataset[hf_subset] - scores[hf_subset] = self._evaluate_subset( - model, - ds, - hf_subset=hf_subset, - hf_split=eval_split, - train_split=train_split, - encode_kwargs=encode_kwargs, - **kwargs, - ) - self._add_main_score(scores[hf_subset]) - - return scores - - def _evaluate_subset( - self, - model: Encoder, - dataset, - hf_subset: str, - hf_split: str = "test", - train_split: str = "train", - *, - encode_kwargs: dict[str, Any], - **kwargs, - ) -> ScoresDict: - train_split = dataset[train_split] - eval_split = dataset[hf_split] - params = {"k": self.k} - params.update(kwargs) - - scores = [] - test_cache, idxs = ( - None, - None, - ) # we store idxs to make the shuffling reproducible - for i in range(self.n_experiments): - logger.info( - "=" * 10 + f" Experiment {i + 1}/{self.n_experiments} " + "=" * 10 - ) - # Bootstrap `self.samples_per_label` samples per label for each split - undersampled_train, idxs = self._undersample_data( - train_split, - self.label_column_name, - self.samples_per_label, - idxs=idxs, - ) - - if self.method == "kNN": - evaluator = ImagekNNClassificationEvaluator( - undersampled_train, - eval_split, - self.image_column_name, - self.label_column_name, - task_metadata=self.metadata, - hf_split=hf_split, - hf_subset=hf_subset, - encode_kwargs=encode_kwargs, - **params, - ) - elif self.method == "logReg": - evaluator = ImagelogRegClassificationEvaluator( - undersampled_train, - eval_split, - self.image_column_name, - self.label_column_name, - task_metadata=self.metadata, - hf_split=hf_split, - hf_subset=hf_subset, - encode_kwargs=encode_kwargs, - **params, - ) - else: - raise ValueError(f"Method {self.method} not supported") - - scores_exp, test_cache = evaluator(model, test_cache=test_cache) - scores.append(scores_exp) - - avg_scores: dict[str, Any] = { - k: np.mean([s[k] for s in scores]) for k in scores[0].keys() - } - avg_scores["scores_per_experiment"] = scores - return avg_scores - - def _undersample_data( - self, dataset_split, label_column_name, samples_per_label, idxs=None - ): - """Undersample data to have samples_per_label samples of each label - without loading all images into memory. - """ - if idxs is None: - idxs = np.arange(len(dataset_split)) - self.np_rng.shuffle(idxs) - if not isinstance(idxs, list): - idxs = idxs.tolist() - label_counter = defaultdict(int) - selected_indices = [] - - labels = dataset_split[label_column_name] - for i in idxs: - label = labels[i] - if label_counter[label] < samples_per_label: - selected_indices.append(i) - label_counter[label] += 1 - - undersampled_dataset = dataset_split.select(selected_indices) - return ( - undersampled_dataset, - idxs, - ) diff --git a/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py b/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py index 5a5f3f33ab..50bc8aaec9 100644 --- a/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py +++ b/mteb/abstasks/Image/AbsTaskImageTextPairClassification.py @@ -62,13 +62,19 @@ def _add_main_score(self, scores) -> None: def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> ImageTextPairClassificationDescriptiveStatistics: - dataset = ( - self.dataset[split] if hf_subset is None else self.dataset[hf_subset][split] - ) + if compute_overall: + # TODO: implement overall statistics + return {} + else: + dataset = ( + self.dataset[split] + if hf_subset is None + else self.dataset[hf_subset][split] + ) num_samples = len(dataset) if isinstance(self.images_column_names, str): - num_images = list(dataset[self.images_column_names]) + num_images = len(list(dataset[self.images_column_names])) elif isinstance(self.images_column_names, list): num_images = sum( [len(dataset[img_column]) for img_column in self.images_column_names] diff --git a/mteb/abstasks/Image/__init__.py b/mteb/abstasks/Image/__init__.py index d4c5807ce9..05c17f81d2 100644 --- a/mteb/abstasks/Image/__init__.py +++ b/mteb/abstasks/Image/__init__.py @@ -2,7 +2,6 @@ from .AbsTaskAny2AnyMultiChoice import AbsTaskAny2AnyMultiChoice from .AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval -from .AbsTaskImageClassification import AbsTaskImageClassification from .AbsTaskImageClustering import AbsTaskImageClustering from .AbsTaskImageMultilabelClassification import AbsTaskImageMultilabelClassification from .AbsTaskImageTextPairClassification import AbsTaskImageTextPairClassification @@ -15,7 +14,6 @@ "AbsTaskImageTextPairClassification", "AbsTaskImageMultilabelClassification", "AbsTaskImageClustering", - "AbsTaskImageClassification", "AbsTaskAny2AnyRetrieval", "AbsTaskAny2AnyMultiChoice", ] diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 167e20349b..6f2ee7e1c1 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -198,6 +198,64 @@ class DescriptiveStatistics(TypedDict): pass +class TextStatistics(TypedDict): + """Class for descriptive statistics for texts. + + Attributes: + min_text_length: Minimum length of text + average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + """ + + min_text_length: int + average_text_length: float + max_text_length: int + unique_texts: int + + +class ImageStatistics(TypedDict): + """Class for descriptive statistics for images. + + Attributes: + min_image_width: Minimum width of images + average_image_width: Average width of images + max_image_width: Maximum width of images + + min_image_height: Minimum height of images + average_image_height: Average height of images + max_image_height: Maximum height of images + """ + + min_image_width: float + average_image_width: float + max_image_width: float + + min_image_height: float + average_image_height: float + max_image_height: float + + +class LabelStatistics(TypedDict): + """Class for descriptive statistics for texts. + + Attributes: + min_labels_per_text: Minimum number of labels per text + average_label_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text + + unique_labels: Number of unique labels + labels: dict of label frequencies + """ + + min_labels_per_text: int + average_label_per_text: float + max_labels_per_text: int + + unique_labels: int + labels: dict[str, dict[str, int]] + + logger = logging.getLogger(__name__) diff --git a/mteb/abstasks/__init__.py b/mteb/abstasks/__init__.py index 8fd06e86fa..3631cab5d3 100644 --- a/mteb/abstasks/__init__.py +++ b/mteb/abstasks/__init__.py @@ -1,8 +1,8 @@ from __future__ import annotations from .AbsTask import AbsTask +from .AbsTaskAnyClassification import AbsTaskAnyClassification from .AbsTaskBitextMining import AbsTaskBitextMining -from .AbsTaskClassification import AbsTaskClassification from .AbsTaskClustering import AbsTaskClustering from .AbsTaskClusteringFast import AbsTaskClusteringFast from .AbsTaskMultilabelClassification import AbsTaskMultilabelClassification @@ -13,7 +13,6 @@ from .Image import ( AbsTaskAny2AnyMultiChoice, AbsTaskAny2AnyRetrieval, - AbsTaskImageClassification, AbsTaskImageClustering, AbsTaskImageMultilabelClassification, AbsTaskImageTextPairClassification, @@ -25,7 +24,7 @@ __all__ = [ "AbsTask", "AbsTaskBitextMining", - "AbsTaskClassification", + "AbsTaskAnyClassification", "AbsTaskClustering", "AbsTaskClusteringFast", "AbsTaskMultilabelClassification", @@ -36,7 +35,6 @@ "TaskMetadata", "AbsTaskAny2AnyMultiChoice", "AbsTaskAny2AnyRetrieval", - "AbsTaskImageClassification", "AbsTaskImageClustering", "AbsTaskImageMultilabelClassification", "AbsTaskImageTextPairClassification", diff --git a/mteb/abstasks/dataset_loaders.py b/mteb/abstasks/dataset_loaders.py index 2170e9e5d1..14bbb7a000 100644 --- a/mteb/abstasks/dataset_loaders.py +++ b/mteb/abstasks/dataset_loaders.py @@ -117,10 +117,8 @@ def _load_corpus(self) -> dict[str, dict[str, str]]: config = f"{self.config}-corpus" if self.config is not None else "corpus" corpus_ds = self.load_dataset_split(config) - corpus_ds = ( - corpus_ds.cast_column("_id", Value("string")) - .rename_column("_id", "id") - .select_columns(["id", "text", "title"]) + corpus_ds = corpus_ds.cast_column("_id", Value("string")).rename_column( + "_id", "id" ) logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper()) logger.info("Doc Example: %s", corpus_ds[0]) diff --git a/mteb/evaluation/__init__.py b/mteb/evaluation/__init__.py index f6c98b5edd..6e18a22336 100644 --- a/mteb/evaluation/__init__.py +++ b/mteb/evaluation/__init__.py @@ -11,9 +11,6 @@ RetrievalEvaluator, STSEvaluator, SummarizationEvaluator, - dot_distance, - kNNClassificationEvaluator, - logRegClassificationEvaluator, ) from .LangMapping import LANG_MAPPING from .MTEB import MTEB @@ -28,9 +25,6 @@ "ClusteringEvaluator", "BitextMiningEvaluator", "PairClassificationEvaluator", - "kNNClassificationEvaluator", - "logRegClassificationEvaluator", - "dot_distance", "LANG_MAPPING", "MTEB", "ClassificationEvaluator", diff --git a/mteb/evaluation/evaluators/ClassificationEvaluator.py b/mteb/evaluation/evaluators/ClassificationEvaluator.py index 3377e71683..3db72ce9a8 100644 --- a/mteb/evaluation/evaluators/ClassificationEvaluator.py +++ b/mteb/evaluation/evaluators/ClassificationEvaluator.py @@ -5,127 +5,100 @@ import numpy as np from datasets import Dataset -from sklearn.linear_model import LogisticRegression +from sklearn.base import BaseEstimator from sklearn.metrics import ( accuracy_score, average_precision_score, f1_score, + precision_score, + recall_score, ) -from sklearn.neighbors import KNeighborsClassifier from torch.utils.data import DataLoader from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.encoder_interface import Encoder -from mteb.model_meta import ScoringFunction +from ...create_dataloaders import create_image_dataloader from .Evaluator import Evaluator logger = logging.getLogger(__name__) -def dot_distance(a: np.ndarray, b: np.ndarray) -> float: - return -np.dot(a, b) - - -class kNNClassificationEvaluator(Evaluator): +class ClassificationEvaluator(Evaluator): def __init__( self, train_dataset: Dataset, eval_dataset: Dataset, + values_column_name: str, + label_column_name: str, task_metadata: TaskMetadata, hf_split: str, hf_subset: str, - k: int = 1, + classifier: BaseEstimator, **kwargs, - ): + ) -> None: super().__init__(**kwargs) self.train_dataset = train_dataset self.eval_dataset = eval_dataset + self.values_column_name = values_column_name + self.label_column_name = label_column_name + self.task_metadata = task_metadata self.hf_split = hf_split self.hf_subset = hf_subset - - self.k = k - - def __call__( - self, - model: Encoder, - *, - encode_kwargs: dict[str, Any], - test_cache: np.ndarray | None = None, - ) -> tuple[dict[str, float], Any]: - scores = {} - max_accuracy = 0 - max_f1 = 0 - max_ap = 0 - X_train = model.encode( - DataLoader(self.train_dataset), - task_metadata=self.task_metadata, - hf_split="train", - hf_subset=self.hf_subset, - **encode_kwargs, - ) - if test_cache is None: - X_test = model.encode( - DataLoader(self.eval_dataset), - task_metadata=self.task_metadata, - hf_split=self.hf_split, - hf_subset=self.hf_subset, - **encode_kwargs, + self.classifier = classifier + + def create_dataloaders(self, batch_size: int) -> tuple[DataLoader, DataLoader]: + if self.task_metadata.modalities == ["image"]: + dataloader_train = create_image_dataloader( + self.train_dataset, + image_column_name=self.values_column_name, + batch_size=batch_size, ) - test_cache = X_test + dataloader_test = create_image_dataloader( + self.eval_dataset, + image_column_name=self.values_column_name, + batch_size=batch_size, + ) + elif self.task_metadata.modalities == ["text"]: + if self.values_column_name != "text": + self.train_dataset = self.train_dataset.rename_column( + self.values_column_name, "text" + ) + self.eval_dataset = self.eval_dataset.rename_column( + self.values_column_name, "text" + ) + dataloader_train = DataLoader(self.train_dataset) + dataloader_test = DataLoader(self.eval_dataset) else: - X_test = test_cache - - y_train = self.train_dataset["label"] - y_test = self.eval_dataset["label"] - for metric in [ - ScoringFunction.COSINE, - ScoringFunction.EUCLIDEAN, - ]: # TODO: "dot" - knn = KNeighborsClassifier( - n_neighbors=self.k, n_jobs=-1, metric=metric.value + raise ValueError( + "ClassificationEvaluator only supports image and text modalities." ) - knn.fit(X_train, y_train) - y_pred = knn.predict(X_test) - accuracy = accuracy_score(y_test, y_pred) - f1 = f1_score(y_test, y_pred, average="macro") - scores["accuracy_" + metric.value] = accuracy - scores["f1_" + metric.value] = f1 - max_accuracy = max(max_accuracy, accuracy) - max_f1 = max(max_f1, f1) # type: ignore - # if binary classification - if len(np.unique(y_train)) == 2: - ap = average_precision_score(y_test, y_pred) - scores["ap_" + metric.value] = ap - max_ap = max(max_ap, ap) - scores["accuracy"] = max_accuracy - scores["f1"] = max_f1 - if len(np.unique(y_train)) == 2: - scores["ap"] = max_ap - return scores, test_cache - + return dataloader_train, dataloader_test -class logRegClassificationEvaluator(Evaluator): - def __init__( + def calculate_scores( self, - train_dataset: Dataset, - eval_dataset: Dataset, - task_metadata: TaskMetadata, - hf_split: str, - hf_subset: str, - max_iter: int = 100, - **kwargs, - ): - super().__init__(**kwargs) - self.train_dataset = train_dataset - self.eval_dataset = eval_dataset + y_test: np.ndarray | list[int], + y_pred: np.ndarray, + ) -> dict[str, float]: + scores = { + "accuracy": accuracy_score(y_test, y_pred), + "f1": f1_score(y_test, y_pred, average="macro"), + "f1_weighted": f1_score(y_test, y_pred, average="weighted"), + "precision": precision_score(y_test, y_pred, average="macro"), + "precision_weighted": precision_score(y_test, y_pred, average="weighted"), + "recall": recall_score(y_test, y_pred, average="macro"), + "recall_weighted": recall_score(y_test, y_pred, average="weighted"), + } - self.max_iter = max_iter - self.task_metadata = task_metadata - self.hf_split = hf_split - self.hf_subset = hf_subset + # if binary classification + if len(np.unique(y_test)) == 2: + scores["ap"] = average_precision_score(y_test, y_pred, average="macro") + scores["ap_weighted"] = average_precision_score( + y_test, y_pred, average="weighted" + ) + return scores def __call__( self, @@ -134,15 +107,12 @@ def __call__( encode_kwargs: dict[str, Any], test_cache: np.ndarray | None = None, ) -> tuple[dict[str, float], Any]: - scores = {} - clf = LogisticRegression( - random_state=self.seed, - n_jobs=-1, - max_iter=self.max_iter, - verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0, + dataloader_train, dataloader_test = self.create_dataloaders( + batch_size=encode_kwargs["batch_size"] ) + X_train = model.encode( - DataLoader(self.train_dataset), + dataloader_train, task_metadata=self.task_metadata, hf_split="train", hf_subset=self.hf_subset, @@ -150,27 +120,17 @@ def __call__( ) if test_cache is None: test_cache = model.encode( - DataLoader(self.eval_dataset), + dataloader_test, task_metadata=self.task_metadata, hf_split=self.hf_split, hf_subset=self.hf_subset, **encode_kwargs, ) logger.info("Fitting logistic regression classifier...") - y_train = self.train_dataset["label"] - y_test = self.eval_dataset["label"] - clf.fit(X_train, y_train) + y_train = self.train_dataset[self.label_column_name] + y_test = self.eval_dataset[self.label_column_name] + self.classifier.fit(X_train, y_train) logger.info("Evaluating...") - y_pred = clf.predict(test_cache) - scores["accuracy"] = accuracy_score(y_test, y_pred) - scores["f1"] = f1_score(y_test, y_pred, average="macro") - scores["f1_weighted"] = f1_score(y_test, y_pred, average="weighted") - - # if binary classification - if len(np.unique(y_test)) == 2: - scores["ap"] = average_precision_score(y_test, y_pred, average="macro") - scores["ap_weighted"] = average_precision_score( - y_test, y_pred, average="weighted" - ) - + y_pred = self.classifier.predict(test_cache) + scores = self.calculate_scores(y_test, y_pred) return scores, test_cache diff --git a/mteb/evaluation/evaluators/Image/ClassificationEvaluator.py b/mteb/evaluation/evaluators/Image/ClassificationEvaluator.py deleted file mode 100644 index 1699ce9e31..0000000000 --- a/mteb/evaluation/evaluators/Image/ClassificationEvaluator.py +++ /dev/null @@ -1,318 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any - -import numpy as np -import torch -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - accuracy_score, - average_precision_score, - f1_score, -) -from sklearn.neighbors import KNeighborsClassifier - -from mteb.abstasks import TaskMetadata -from mteb.create_dataloaders import create_image_dataloader -from mteb.encoder_interface import Encoder -from mteb.model_meta import ScoringFunction -from mteb.similarity_functions import cos_sim, dot_score, euclidean_sim - -from ..Evaluator import Evaluator - -logger = logging.getLogger(__name__) - - -class ImagekNNClassificationEvaluator(Evaluator): - def __init__( - self, - dataset_train, - dataset_test, - image_column_name, - label_column_name, - task_metadata: TaskMetadata, - hf_split: str, - hf_subset: str, - k: int = 1, - *, - encode_kwargs: dict[str, Any], - limit: int | None = None, - **kwargs, - ): - super().__init__(**kwargs) - - if limit is not None: - dataset_train = dataset_train.select(list(range(limit))) - - self.image_column_name = image_column_name - self.dataset_train = dataset_train - self.y_train = dataset_train[label_column_name] - - self.dataset_test = dataset_test - self.y_test = dataset_test[label_column_name] - self.task_metadata = task_metadata - self.hf_split = hf_split - self.hf_subset = hf_subset - self.encode_kwargs = encode_kwargs - - self.k = k - - def __call__(self, model, test_cache=None): - scores = {} - max_accuracy = 0 - max_f1 = 0 - max_ap = 0 - dataloader_train = create_image_dataloader( - self.dataset_train, - image_column_name=self.image_column_name, - batch_size=self.encode_kwargs["batch_size"], - ) - X_train = model.encode( - dataloader_train, - task_metadata=self.task_metadata, - hf_split="train", - hf_subset=self.hf_subset, - batch_size=self.encode_kwargs["batch_size"], - ) - dataloader = create_image_dataloader( - self.dataset_test, - image_column_name=self.image_column_name, - batch_size=self.encode_kwargs["batch_size"], - ) - if test_cache is None: - X_test = model.encode( - dataloader, - task_metadata=self.task_metadata, - hf_split=self.hf_split, - hf_subset=self.hf_subset, - batch_size=self.encode_kwargs["batch_size"], - ) - test_cache = X_test - else: - X_test = test_cache - for metric in [ - ScoringFunction.COSINE, - ScoringFunction.EUCLIDEAN, - ]: # TODO: "dot" - knn = KNeighborsClassifier(n_neighbors=self.k, n_jobs=-1, metric=metric) - knn.fit(X_train, self.y_train) - y_pred = knn.predict(X_test) - accuracy = accuracy_score(self.y_test, y_pred) - f1 = f1_score(self.y_test, y_pred, average="macro") - scores["accuracy_" + metric] = accuracy - scores["f1_" + metric] = f1 - max_accuracy = max(max_accuracy, accuracy) - max_f1 = max(max_f1, f1) # type: ignore - # if binary classification - if len(np.unique(self.y_train)) == 2: - ap = average_precision_score(self.y_test, y_pred) - scores["ap_" + metric] = ap - max_ap = max(max_ap, ap) - scores["accuracy"] = max_accuracy - scores["f1"] = max_f1 - if len(np.unique(self.y_train)) == 2: - scores["ap"] = max_ap - return scores, test_cache - - -class ImagekNNClassificationEvaluatorPytorch(Evaluator): - def __init__( - self, - dataset_train, - dataset_test, - image_column_name, - label_column_name, - task_metadata: TaskMetadata, - hf_split: str, - hf_subset: str, - k: int = 1, - *, - encode_kwargs: dict[str, Any], - limit: int | None = None, - **kwargs: Any, - ): - super().__init__(**kwargs) - if limit is not None: - dataset_train = dataset_train.select(list(range(limit))) - - self.image_column_name = image_column_name - self.dataset_train = dataset_train - self.y_train = dataset_train[label_column_name] - - self.dataset_test = dataset_test - self.y_test = dataset_test[label_column_name] - self.task_metadata = task_metadata - self.hf_split = hf_split - self.hf_subset = hf_subset - self.encode_kwargs = encode_kwargs - - self.k = k - - def __call__(self, model: Encoder, test_cache=None): - scores = {} - max_accuracy = 0 - max_f1 = 0 - max_ap = 0 - - dataloader_train = create_image_dataloader( - self.dataset_train, - image_column_name=self.image_column_name, - batch_size=self.encode_kwargs["batch_size"], - ) - X_train = model.encode( - dataloader_train, - task_metadata=self.task_metadata, - hf_split="train", - hf_subset=self.hf_subset, - batch_size=self.encode_kwargs["batch_size"], - ) - - dataloader = create_image_dataloader( - self.dataset_test, - image_column_name=self.image_column_name, - batch_size=self.encode_kwargs["batch_size"], - ) - if test_cache is None: - X_test = model.encode( - dataloader, - task_metadata=self.task_metadata, - hf_split=self.hf_split, - hf_subset=self.hf_subset, - batch_size=self.encode_kwargs["batch_size"], - ) - test_cache = X_test - else: - X_test = test_cache - for metric in [ - ScoringFunction.COSINE, - ScoringFunction.EUCLIDEAN, - ScoringFunction.DOT_PRODUCT, - ScoringFunction.CUSTOM, - ]: - if metric == ScoringFunction.COSINE: - distances = 1 - cos_sim(X_test, X_train) - elif metric == ScoringFunction.EUCLIDEAN: - distances = euclidean_sim(X_test, X_train) - elif metric == ScoringFunction.DOT_PRODUCT: - distances = -dot_score(X_test, X_train) - elif metric == ScoringFunction.CUSTOM: - distances = model.similarity(X_test, X_train) - neigh_indices = torch.topk( - distances, k=self.k, dim=1, largest=False - ).indices - y_train = torch.tensor(self.y_train) - y_pred = torch.mode( - y_train[neigh_indices], dim=1 - ).values # TODO: case where there is no majority - y_pred = y_pred.tolist() - accuracy = accuracy_score(self.y_test, y_pred) - f1 = f1_score(self.y_test, y_pred, average="macro") - scores["accuracy_" + metric.value] = accuracy - scores["f1_" + metric.value] = f1 - max_accuracy = max(max_accuracy, accuracy) - max_f1 = max(max_f1, f1) # type: ignore - # if binary classification - if len(np.unique(self.y_train)) == 2: - ap = average_precision_score(self.y_test, y_pred) - scores["ap_" + metric.value] = ap - max_ap = max(max_ap, ap) - scores["accuracy"] = max_accuracy - scores["f1"] = max_f1 - if len(np.unique(self.y_train)) == 2: - scores["ap"] = max_ap - return scores, test_cache - - -class ImagelogRegClassificationEvaluator(Evaluator): - def __init__( - self, - dataset_train, - dataset_test, - image_column_name, - label_column_name, - task_metadata: TaskMetadata, - hf_split: str, - hf_subset: str, - max_iter: int = 100, - *, - encode_kwargs: dict[str, Any], - limit: int | None = None, - **kwargs, - ): - super().__init__(**kwargs) - self.encode_kwargs = encode_kwargs - - if limit is not None: - dataset_train = dataset_train.select(list(range(limit))) - - self.image_column_name = image_column_name - self.dataset_train = dataset_train - self.y_train = dataset_train[label_column_name] - - self.dataset_test = dataset_test - self.y_test = dataset_test[label_column_name] - - self.max_iter = max_iter - self.task_metadata = task_metadata - self.hf_split = hf_split - self.hf_subset = hf_subset - - def __call__(self, model, test_cache=None): - scores = {} - clf = LogisticRegression( - random_state=self.seed, - n_jobs=-1, - max_iter=self.max_iter, - verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0, - ) - - dataloader_train = create_image_dataloader( - self.dataset_train, - image_column_name=self.image_column_name, - batch_size=self.encode_kwargs["batch_size"], - ) - X_train = model.encode( - dataloader_train, - task_metadata=self.task_metadata, - hf_split="train", - hf_subset=self.hf_subset, - batch_size=self.encode_kwargs["batch_size"], - ) - - dataloader = create_image_dataloader( - self.dataset_test, - image_column_name=self.image_column_name, - batch_size=self.encode_kwargs["batch_size"], - ) - if test_cache is None: - X_test = model.encode( - dataloader, - task_metadata=self.task_metadata, - hf_split=self.hf_split, - hf_subset=self.hf_subset, - batch_size=self.encode_kwargs["batch_size"], - ) - test_cache = X_test - else: - X_test = test_cache - logger.info("Fitting logistic regression classifier...") - if X_train.dtype == torch.bfloat16: - X_train = X_train.to(torch.float32) - if X_test.dtype == torch.bfloat16: - X_test = X_test.to(torch.float32) - clf.fit(X_train, self.y_train) - logger.info("Evaluating...") - y_pred = clf.predict(X_test) - scores["accuracy"] = accuracy_score(self.y_test, y_pred) - scores["f1"] = f1_score(self.y_test, y_pred, average="macro") - scores["f1_weighted"] = f1_score(self.y_test, y_pred, average="weighted") - - # if binary classification - if len(np.unique(self.y_train)) == 2: - scores["ap"] = average_precision_score(self.y_test, y_pred, average="macro") - scores["ap_weighted"] = average_precision_score( - self.y_test, y_pred, average="weighted" - ) - - return scores, test_cache diff --git a/mteb/evaluation/evaluators/Image/__init__.py b/mteb/evaluation/evaluators/Image/__init__.py index 8bfdb451ad..5ba68a246b 100644 --- a/mteb/evaluation/evaluators/Image/__init__.py +++ b/mteb/evaluation/evaluators/Image/__init__.py @@ -2,11 +2,6 @@ from .Any2AnyMultiChoiceEvaluator import Any2AnyMultiChoiceEvaluator from .Any2AnyRetrievalEvaluator import Any2AnyRetrievalEvaluator -from .ClassificationEvaluator import ( - ImagekNNClassificationEvaluator, - ImagekNNClassificationEvaluatorPytorch, - ImagelogRegClassificationEvaluator, -) from .ClusteringEvaluator import ImageClusteringEvaluator from .ImageTextPairClassificationEvaluator import ImageTextPairClassificationEvaluator from .VisualSTSEvaluator import VisualSTSEvaluator @@ -15,9 +10,6 @@ __all__ = [ "Any2AnyMultiChoiceEvaluator", "Any2AnyRetrievalEvaluator", - "ImagekNNClassificationEvaluator", - "ImagelogRegClassificationEvaluator", - "ImagekNNClassificationEvaluatorPytorch", "ImageClusteringEvaluator", "ImageTextPairClassificationEvaluator", "VisualSTSEvaluator", diff --git a/mteb/evaluation/evaluators/__init__.py b/mteb/evaluation/evaluators/__init__.py index cbd2e00e2c..72b9c2f102 100644 --- a/mteb/evaluation/evaluators/__init__.py +++ b/mteb/evaluation/evaluators/__init__.py @@ -1,20 +1,13 @@ from __future__ import annotations from .BitextMiningEvaluator import BitextMiningEvaluator -from .ClassificationEvaluator import ( - dot_distance, - kNNClassificationEvaluator, - logRegClassificationEvaluator, -) +from .ClassificationEvaluator import ClassificationEvaluator from .ClusteringEvaluator import ClusteringEvaluator from .Evaluator import Evaluator from .Image import ( Any2AnyMultiChoiceEvaluator, Any2AnyRetrievalEvaluator, ImageClusteringEvaluator, - ImagekNNClassificationEvaluator, - ImagekNNClassificationEvaluatorPytorch, - ImagelogRegClassificationEvaluator, ImageTextPairClassificationEvaluator, VisualSTSEvaluator, ZeroShotClassificationEvaluator, @@ -38,16 +31,11 @@ "ClusteringEvaluator", "BitextMiningEvaluator", "PairClassificationEvaluator", - "kNNClassificationEvaluator", - "logRegClassificationEvaluator", - "dot_distance", "Any2AnyMultiChoiceEvaluator", "Any2AnyRetrievalEvaluator", - "ImagekNNClassificationEvaluator", - "ImagelogRegClassificationEvaluator", - "ImagekNNClassificationEvaluatorPytorch", "ImageClusteringEvaluator", "ImageTextPairClassificationEvaluator", "VisualSTSEvaluator", "ZeroShotClassificationEvaluator", + "ClassificationEvaluator", ] diff --git a/mteb/overview.py b/mteb/overview.py index aab9198c46..9c5e42219d 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -8,7 +8,10 @@ import pandas as pd -from mteb.abstasks import AbsTask, AbsTaskMultilabelClassification +from mteb.abstasks import ( + AbsTask, + AbsTaskMultilabelClassification, +) from mteb.abstasks.AbsTaskReranking import AbsTaskReranking from mteb.abstasks.TaskMetadata import TASK_CATEGORY, TASK_DOMAIN, TASK_TYPE from mteb.custom_validators import MODALITIES diff --git a/mteb/tasks/Classification/ara/AJGT.py b/mteb/tasks/Classification/ara/AJGT.py index b10602d801..9000cd9f63 100644 --- a/mteb/tasks/Classification/ara/AJGT.py +++ b/mteb/tasks/Classification/ara/AJGT.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class AJGT(AbsTaskClassification): +class AJGT(AbsTaskAnyClassification): metadata = TaskMetadata( name="AJGT", dataset={ diff --git a/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py b/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py index 92509f2d7e..8632bd9650 100644 --- a/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class HotelReviewSentimentClassification(AbsTaskClassification): +class HotelReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="HotelReviewSentimentClassification", dataset={ diff --git a/mteb/tasks/Classification/ara/OnlineStoreReviewSentimentClassification.py b/mteb/tasks/Classification/ara/OnlineStoreReviewSentimentClassification.py index c0c4429a1d..1e4854b39f 100644 --- a/mteb/tasks/Classification/ara/OnlineStoreReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ara/OnlineStoreReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class OnlineStoreReviewSentimentClassification(AbsTaskClassification): +class OnlineStoreReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OnlineStoreReviewSentimentClassification", dataset={ diff --git a/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py b/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py index e6d8732909..11206683d9 100644 --- a/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class RestaurantReviewSentimentClassification(AbsTaskClassification): +class RestaurantReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="RestaurantReviewSentimentClassification", dataset={ diff --git a/mteb/tasks/Classification/ara/TweetEmotionClassification.py b/mteb/tasks/Classification/ara/TweetEmotionClassification.py index 3d50e6a456..a1d93f8cc8 100644 --- a/mteb/tasks/Classification/ara/TweetEmotionClassification.py +++ b/mteb/tasks/Classification/ara/TweetEmotionClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TweetEmotionClassification(AbsTaskClassification): +class TweetEmotionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TweetEmotionClassification", dataset={ diff --git a/mteb/tasks/Classification/ara/TweetSarcasmClassification.py b/mteb/tasks/Classification/ara/TweetSarcasmClassification.py index 928cf3bf9a..a21da63b6c 100644 --- a/mteb/tasks/Classification/ara/TweetSarcasmClassification.py +++ b/mteb/tasks/Classification/ara/TweetSarcasmClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TweetSarcasmClassification(AbsTaskClassification): +class TweetSarcasmClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TweetSarcasmClassification", dataset={ diff --git a/mteb/tasks/Classification/ben/BengaliDocumentClassification.py b/mteb/tasks/Classification/ben/BengaliDocumentClassification.py index 76c846b075..be8987402b 100644 --- a/mteb/tasks/Classification/ben/BengaliDocumentClassification.py +++ b/mteb/tasks/Classification/ben/BengaliDocumentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class BengaliDocumentClassification(AbsTaskClassification): +class BengaliDocumentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="BengaliDocumentClassification", description="Dataset for News Classification, categorized with 13 domains.", diff --git a/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py b/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py index 69df9689d6..d7b4cecf9a 100644 --- a/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py +++ b/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class BengaliHateSpeechClassification(AbsTaskClassification): +class BengaliHateSpeechClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="BengaliHateSpeechClassification", description="The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text.", diff --git a/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py b/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py index 91c489cfcd..5c32013e68 100644 --- a/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py +++ b/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class BengaliSentimentAnalysis(AbsTaskClassification): +class BengaliSentimentAnalysis(AbsTaskAnyClassification): metadata = TaskMetadata( name="BengaliSentimentAnalysis", description="dataset contains 3307 Negative reviews and 8500 Positive reviews collected and manually annotated from Youtube Bengali drama.", diff --git a/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py b/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py index 80493e9e70..86f5f2dd8b 100644 --- a/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py +++ b/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class BulgarianStoreReviewSentimentClassfication(AbsTaskClassification): +class BulgarianStoreReviewSentimentClassfication(AbsTaskAnyClassification): metadata = TaskMetadata( name="BulgarianStoreReviewSentimentClassfication", description="Bulgarian online store review dataset for sentiment classification.", diff --git a/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py b/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py index 1fe962b32b..2463df1759 100644 --- a/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification): +class CSFDCZMovieReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CSFDCZMovieReviewSentimentClassification", description="The dataset contains 30k user reviews from csfd.cz in Czech.", diff --git a/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py b/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py index b591d5f354..51d2f7e70c 100644 --- a/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CzechProductReviewSentimentClassification(AbsTaskClassification): +class CzechProductReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CzechProductReviewSentimentClassification", description="User reviews of products on Czech e-shop Mall.cz with 3 sentiment classes (positive, neutral, negative)", diff --git a/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py b/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py index b84de224c0..41c95d7009 100644 --- a/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py +++ b/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CzechSoMeSentimentClassification(AbsTaskClassification): +class CzechSoMeSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CzechSoMeSentimentClassification", description="User comments on Facebook", diff --git a/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py b/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py index 25437b1b23..2a4c31ed6c 100644 --- a/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py +++ b/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CzechSubjectivityClassification(AbsTaskClassification): +class CzechSubjectivityClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CzechSubjectivityClassification", description="An Czech dataset for subjectivity classification.", diff --git a/mteb/tasks/Classification/dan/AngryTweetsClassification.py b/mteb/tasks/Classification/dan/AngryTweetsClassification.py index 61430f8570..32cadbde42 100644 --- a/mteb/tasks/Classification/dan/AngryTweetsClassification.py +++ b/mteb/tasks/Classification/dan/AngryTweetsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class AngryTweetsClassification(AbsTaskClassification): +class AngryTweetsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AngryTweetsClassification", dataset={ diff --git a/mteb/tasks/Classification/dan/DKHateClassification.py b/mteb/tasks/Classification/dan/DKHateClassification.py index 51e1b70d7c..0c8b200160 100644 --- a/mteb/tasks/Classification/dan/DKHateClassification.py +++ b/mteb/tasks/Classification/dan/DKHateClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DKHateClassification(AbsTaskClassification): +class DKHateClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DKHateClassification", dataset={ diff --git a/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py b/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py index 097e5bc1d3..7cc5f5ffe7 100644 --- a/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py +++ b/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DanishPoliticalCommentsClassification(AbsTaskClassification): +class DanishPoliticalCommentsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DanishPoliticalCommentsClassification", dataset={ diff --git a/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py b/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py index 5ed1d9bfc1..c6228d93c4 100644 --- a/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py +++ b/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DdiscoCohesionClassification(AbsTaskClassification): +class DdiscoCohesionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Ddisco", dataset={ diff --git a/mteb/tasks/Classification/dan/LccSentimentClassification.py b/mteb/tasks/Classification/dan/LccSentimentClassification.py index cb84c2e658..4cf49a6036 100644 --- a/mteb/tasks/Classification/dan/LccSentimentClassification.py +++ b/mteb/tasks/Classification/dan/LccSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class LccSentimentClassification(AbsTaskClassification): +class LccSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LccSentimentClassification", dataset={ diff --git a/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py b/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py index d43b1f93fa..1dd0dc860e 100644 --- a/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py +++ b/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class GermanPoliticiansTwitterSentimentClassification(AbsTaskClassification): +class GermanPoliticiansTwitterSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="GermanPoliticiansTwitterSentimentClassification", description="GermanPoliticiansTwitterSentiment is a dataset of German tweets categorized with their sentiment (3 classes).", diff --git a/mteb/tasks/Classification/deu/TenKGnadClassification.py b/mteb/tasks/Classification/deu/TenKGnadClassification.py index f2f993888a..8cef9331cf 100644 --- a/mteb/tasks/Classification/deu/TenKGnadClassification.py +++ b/mteb/tasks/Classification/deu/TenKGnadClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TenKGnadClassification(AbsTaskClassification): +class TenKGnadClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TenKGnadClassification", description="10k German News Articles Dataset (10kGNAD) contains news articles from the online Austrian newspaper website DER Standard with their topic classification (9 classes).", diff --git a/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py b/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py index de9ce6240b..d922e2d50d 100644 --- a/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py +++ b/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata TEST_SAMPLES = 2048 -class GreekLegalCodeClassification(AbsTaskClassification): +class GreekLegalCodeClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="GreekLegalCodeClassification", description="Greek Legal Code Dataset for Classification. (subset = chapter)", diff --git a/mteb/tasks/Classification/eng/AmazonPolarityClassification.py b/mteb/tasks/Classification/eng/AmazonPolarityClassification.py index 7ed07bddbc..26050d29a5 100644 --- a/mteb/tasks/Classification/eng/AmazonPolarityClassification.py +++ b/mteb/tasks/Classification/eng/AmazonPolarityClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class AmazonPolarityClassification(AbsTaskClassification): +class AmazonPolarityClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AmazonPolarityClassification", description="Amazon Polarity Classification Dataset.", diff --git a/mteb/tasks/Classification/eng/ArxivClassification.py b/mteb/tasks/Classification/eng/ArxivClassification.py index f108025e0a..5fb52e58c7 100644 --- a/mteb/tasks/Classification/eng/ArxivClassification.py +++ b/mteb/tasks/Classification/eng/ArxivClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class ArxivClassification(AbsTaskClassification): +class ArxivClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ArxivClassification", description="Classification Dataset of Arxiv Papers", diff --git a/mteb/tasks/Classification/eng/Banking77Classification.py b/mteb/tasks/Classification/eng/Banking77Classification.py index 1a822b20a1..e9e3b789fe 100644 --- a/mteb/tasks/Classification/eng/Banking77Classification.py +++ b/mteb/tasks/Classification/eng/Banking77Classification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class Banking77Classification(AbsTaskClassification): +class Banking77Classification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Banking77Classification", description="Dataset composed of online banking queries annotated with their corresponding intents.", diff --git a/mteb/tasks/Classification/eng/DBpediaClassification.py b/mteb/tasks/Classification/eng/DBpediaClassification.py index f2cb8a17f7..bafb534ee9 100644 --- a/mteb/tasks/Classification/eng/DBpediaClassification.py +++ b/mteb/tasks/Classification/eng/DBpediaClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DBpediaClassification(AbsTaskClassification): +class DBpediaClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DBpediaClassification", description="DBpedia14 is a dataset of English texts from Wikipedia articles, categorized into 14 non-overlapping classes based on their DBpedia ontology.", diff --git a/mteb/tasks/Classification/eng/EmotionClassification.py b/mteb/tasks/Classification/eng/EmotionClassification.py index 888707a111..aaf0fb5e83 100644 --- a/mteb/tasks/Classification/eng/EmotionClassification.py +++ b/mteb/tasks/Classification/eng/EmotionClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class EmotionClassification(AbsTaskClassification): +class EmotionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="EmotionClassification", description="Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.", diff --git a/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py b/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py index 3736629af2..cd1108456a 100644 --- a/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py +++ b/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FinancialPhrasebankClassification(AbsTaskClassification): +class FinancialPhrasebankClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FinancialPhrasebankClassification", description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.", diff --git a/mteb/tasks/Classification/eng/FrenkEnClassification.py b/mteb/tasks/Classification/eng/FrenkEnClassification.py index ed23de297b..34d94f7c05 100644 --- a/mteb/tasks/Classification/eng/FrenkEnClassification.py +++ b/mteb/tasks/Classification/eng/FrenkEnClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FrenkEnClassification(AbsTaskClassification): +class FrenkEnClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FrenkEnClassification", description="English subset of the FRENK dataset", diff --git a/mteb/tasks/Classification/eng/ImdbClassification.py b/mteb/tasks/Classification/eng/ImdbClassification.py index f08b1c52d3..2f8f0f5347 100644 --- a/mteb/tasks/Classification/eng/ImdbClassification.py +++ b/mteb/tasks/Classification/eng/ImdbClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class ImdbClassification(AbsTaskClassification): +class ImdbClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ImdbClassification", description="Large Movie Review Dataset", diff --git a/mteb/tasks/Classification/eng/LegalBenchClassification.py b/mteb/tasks/Classification/eng/LegalBenchClassification.py index 2f95bdc7f9..52d31f60c4 100644 --- a/mteb/tasks/Classification/eng/LegalBenchClassification.py +++ b/mteb/tasks/Classification/eng/LegalBenchClassification.py @@ -5,11 +5,11 @@ import datasets from datasets import concatenate_datasets -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CanadaTaxCourtOutcomesLegalBenchClassification(AbsTaskClassification): +class CanadaTaxCourtOutcomesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CanadaTaxCourtOutcomesLegalBenchClassification", description="The input is an excerpt of text from Tax Court of Canada decisions involving appeals of tax related matters. The task is to classify whether the excerpt includes the outcome of the appeal, and if so, to specify whether the appeal was allowed or dismissed. Partial success (e.g. appeal granted on one tax year but dismissed on another) counts as allowed (with the exception of costs orders which are disregarded). Where the excerpt does not clearly articulate an outcome, the system should indicate other as the outcome. Categorizing case outcomes is a common task that legal researchers complete in order to gather datasets involving outcomes in legal processes for the purposes of quantitative empirical legal research.", @@ -48,7 +48,7 @@ def dataset_transform(self): class ContractNLIConfidentialityOfAgreementLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLIConfidentialityOfAgreementLegalBenchClassification", @@ -99,7 +99,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLIExplicitIdentificationLegalBenchClassification(AbsTaskClassification): +class ContractNLIExplicitIdentificationLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="ContractNLIExplicitIdentificationLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that all Confidential Information shall be expressly identified by the Disclosing Party.", @@ -150,7 +152,7 @@ def dataset_transform(self): class ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", @@ -201,7 +203,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLILimitedUseLegalBenchClassification(AbsTaskClassification): +class ContractNLILimitedUseLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ContractNLILimitedUseLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that the Receiving Party shall not use any Confidential Information for any purpose other than the purposes stated in Agreement.", @@ -251,7 +253,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLINoLicensingLegalBenchClassification(AbsTaskClassification): +class ContractNLINoLicensingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ContractNLINoLicensingLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that the Agreement shall not grant Receiving Party any right to Confidential Information.", @@ -302,7 +304,7 @@ def dataset_transform(self): class ContractNLINoticeOnCompelledDisclosureLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", @@ -354,7 +356,7 @@ def dataset_transform(self): class ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", @@ -405,7 +407,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLIPermissibleCopyLegalBenchClassification(AbsTaskClassification): +class ContractNLIPermissibleCopyLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ContractNLIPermissibleCopyLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that the Receiving Party may create a copy of some Confidential Information in some circumstances.", @@ -456,7 +458,7 @@ def dataset_transform(self): class ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", @@ -508,7 +510,7 @@ def dataset_transform(self): class ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", @@ -560,7 +562,7 @@ def dataset_transform(self): class ContractNLIReturnOfConfidentialInformationLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="ContractNLIReturnOfConfidentialInformationLegalBenchClassification", @@ -611,7 +613,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLISharingWithEmployeesLegalBenchClassification(AbsTaskClassification): +class ContractNLISharingWithEmployeesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ContractNLISharingWithEmployeesLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that the Receiving Party may share some Confidential Information with some of Receiving Party's employees.", @@ -661,7 +663,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLISharingWithThirdPartiesLegalBenchClassification(AbsTaskClassification): +class ContractNLISharingWithThirdPartiesLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="ContractNLISharingWithThirdPartiesLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that the Receiving Party may share some Confidential Information with some third-parties (including consultants, agents and professional advisors).", @@ -711,7 +715,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class ContractNLISurvivalOfObligationsLegalBenchClassification(AbsTaskClassification): +class ContractNLISurvivalOfObligationsLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="ContractNLISurvivalOfObligationsLegalBenchClassification", description="This task is a subset of ContractNLI, and consists of determining whether a clause from an NDA clause provides that some obligations of Agreement may survive termination of Agreement.", @@ -761,7 +767,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CorporateLobbyingLegalBenchClassification(AbsTaskClassification): +class CorporateLobbyingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CorporateLobbyingLegalBenchClassification", description="The Corporate Lobbying task consists of determining whether a proposed Congressional bill may be relevant to a company based on a company's self-description in its SEC 10K filing.", @@ -821,7 +827,7 @@ def dataset_transform(self): ) -class CUADAffiliateLicenseLicenseeLegalBenchClassification(AbsTaskClassification): +class CUADAffiliateLicenseLicenseeLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADAffiliateLicenseLicenseeLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if a clause describes a license grant to a licensee (incl. sublicensor) and the affiliates of such licensee/sublicensor.", @@ -872,7 +878,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADAffiliateLicenseLicensorLegalBenchClassification(AbsTaskClassification): +class CUADAffiliateLicenseLicensorLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADAffiliateLicenseLicensorLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause describes a license grant by affiliates of the licensor or that includes intellectual property of affiliates of the licensor.", @@ -923,7 +929,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADAntiAssignmentLegalBenchClassification(AbsTaskClassification): +class CUADAntiAssignmentLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADAntiAssignmentLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause requires consent or notice of a party if the contract is assigned to a third party.", @@ -974,7 +980,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADAuditRightsLegalBenchClassification(AbsTaskClassification): +class CUADAuditRightsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADAuditRightsLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause gives a party the right to audit the books, records, or physical locations of the counterparty to ensure compliance with the contract.", @@ -1025,7 +1031,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADCapOnLiabilityLegalBenchClassification(AbsTaskClassification): +class CUADCapOnLiabilityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADCapOnLiabilityLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a cap on liability upon the breach of a party's obligation. This includes time limitation for the counterparty to bring claims or maximum amount for recovery.", @@ -1076,7 +1082,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADChangeOfControlLegalBenchClassification(AbsTaskClassification): +class CUADChangeOfControlLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADChangeOfControlLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause gives one party the right to terminate or is consent or notice required of the counterparty if such party undergoes a change of control, such as a merger, stock sale, transfer of all or substantially all of its assets or business, or assignment by operation of law.", @@ -1128,7 +1134,7 @@ def dataset_transform(self): class CUADCompetitiveRestrictionExceptionLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="CUADCompetitiveRestrictionExceptionLegalBenchClassification", @@ -1180,7 +1186,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADCovenantNotToSueLegalBenchClassification(AbsTaskClassification): +class CUADCovenantNotToSueLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADCovenantNotToSueLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies that a party is restricted from contesting the validity of the counterparty's ownership of intellectual property or otherwise bringing a claim against the counterparty for matters unrelated to the contract.", @@ -1231,7 +1237,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADEffectiveDateLegalBenchClassification(AbsTaskClassification): +class CUADEffectiveDateLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADEffectiveDateLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies the date upon which the agreement becomes effective.", @@ -1282,7 +1288,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADExclusivityLegalBenchClassification(AbsTaskClassification): +class CUADExclusivityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADExclusivityLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies exclusive dealing commitment with the counterparty. This includes a commitment to procure all 'requirements' from one party of certain technology, goods, or services or a prohibition on licensing or selling technology, goods or services to third parties, or a prohibition on collaborating or working with other parties), whether during the contract or after the contract ends (or both).", @@ -1333,7 +1339,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADExpirationDateLegalBenchClassification(AbsTaskClassification): +class CUADExpirationDateLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADExpirationDateLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies the date upon which the initial term expires.", @@ -1384,7 +1390,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADGoverningLawLegalBenchClassification(AbsTaskClassification): +class CUADGoverningLawLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADGoverningLawLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies which state/country’s law governs the contract.", @@ -1435,7 +1441,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADInsuranceLegalBenchClassification(AbsTaskClassification): +class CUADInsuranceLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADInsuranceLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if clause creates a requirement for insurance that must be maintained by one party for the benefit of the counterparty.", @@ -1486,7 +1492,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADIPOwnershipAssignmentLegalBenchClassification(AbsTaskClassification): +class CUADIPOwnershipAssignmentLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADIPOwnershipAssignmentLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies that intellectual property created by one party become the property of the counterparty, either per the terms of the contract or upon the occurrence of certain events.", @@ -1537,7 +1543,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADIrrevocableOrPerpetualLicenseLegalBenchClassification(AbsTaskClassification): +class CUADIrrevocableOrPerpetualLicenseLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a license grant that is irrevocable or perpetual.", @@ -1588,7 +1596,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADJointIPOwnershipLegalBenchClassification(AbsTaskClassification): +class CUADJointIPOwnershipLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADJointIPOwnershipLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause provides for joint or shared ownership of intellectual property between the parties to the contract.", @@ -1639,7 +1647,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADLicenseGrantLegalBenchClassification(AbsTaskClassification): +class CUADLicenseGrantLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADLicenseGrantLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause contains a license granted by one party to its counterparty.", @@ -1690,7 +1698,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADLiquidatedDamagesLegalBenchClassification(AbsTaskClassification): +class CUADLiquidatedDamagesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADLiquidatedDamagesLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause awards either party liquidated damages for breach or a fee upon the termination of a contract (termination fee).", @@ -1741,7 +1749,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADMinimumCommitmentLegalBenchClassification(AbsTaskClassification): +class CUADMinimumCommitmentLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADMinimumCommitmentLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a minimum order size or minimum amount or units per time period that one party must buy from the counterparty.", @@ -1792,7 +1800,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADMostFavoredNationLegalBenchClassification(AbsTaskClassification): +class CUADMostFavoredNationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADMostFavoredNationLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if a third party gets better terms on the licensing or sale of technology/goods/services described in the contract, the buyer of such technology/goods/services under the contract shall be entitled to those better terms.", @@ -1843,7 +1851,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADNoSolicitOfCustomersLegalBenchClassification(AbsTaskClassification): +class CUADNoSolicitOfCustomersLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADNoSolicitOfCustomersLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause restricts a party from contracting or soliciting customers or partners of the counterparty, whether during the contract or after the contract ends (or both).", @@ -1894,7 +1902,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADNoSolicitOfEmployeesLegalBenchClassification(AbsTaskClassification): +class CUADNoSolicitOfEmployeesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADNoSolicitOfEmployeesLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause restricts a party's soliciting or hiring employees and/or contractors from the counterparty, whether during the contract or after the contract ends (or both).", @@ -1945,7 +1953,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADNonCompeteLegalBenchClassification(AbsTaskClassification): +class CUADNonCompeteLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADNonCompeteLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause restricts the ability of a party to compete with the counterparty or operate in a certain geography or business or technology sector.", @@ -1996,7 +2004,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADNonDisparagementLegalBenchClassification(AbsTaskClassification): +class CUADNonDisparagementLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADNonDisparagementLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause requires a party not to disparage the counterparty.", @@ -2047,7 +2055,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADNonTransferableLicenseLegalBenchClassification(AbsTaskClassification): +class CUADNonTransferableLicenseLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADNonTransferableLicenseLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause limits the ability of a party to transfer the license being granted to a third party.", @@ -2098,7 +2106,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADNoticePeriodToTerminateRenewalLegalBenchClassification(AbsTaskClassification): +class CUADNoticePeriodToTerminateRenewalLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="CUADNoticePeriodToTerminateRenewalLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a notice period required to terminate renewal.", @@ -2149,7 +2159,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADPostTerminationServicesLegalBenchClassification(AbsTaskClassification): +class CUADPostTerminationServicesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADPostTerminationServicesLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause subjects a party to obligations after the termination or expiration of a contract, including any post-termination transition, payment, transfer of IP, wind-down, last-buy, or similar commitments.", @@ -2200,7 +2210,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADPriceRestrictionsLegalBenchClassification(AbsTaskClassification): +class CUADPriceRestrictionsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADPriceRestrictionsLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause places a restriction on the ability of a party to raise or reduce prices of technology, goods, or services provided.", @@ -2251,7 +2261,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADRenewalTermLegalBenchClassification(AbsTaskClassification): +class CUADRenewalTermLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADRenewalTermLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a renewal term.", @@ -2302,7 +2312,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADRevenueProfitSharingLegalBenchClassification(AbsTaskClassification): +class CUADRevenueProfitSharingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADRevenueProfitSharingLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause require a party to share revenue or profit with the counterparty for any technology, goods, or services.", @@ -2353,7 +2363,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADRofrRofoRofnLegalBenchClassification(AbsTaskClassification): +class CUADRofrRofoRofnLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADRofrRofoRofnLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause grant one party a right of first refusal, right of first offer or right of first negotiation to purchase, license, market, or distribute equity interest, technology, assets, products or services.", @@ -2404,7 +2414,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADSourceCodeEscrowLegalBenchClassification(AbsTaskClassification): +class CUADSourceCodeEscrowLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADSourceCodeEscrowLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause requires one party to deposit its source code into escrow with a third party, which can be released to the counterparty upon the occurrence of certain events (bankruptcy, insolvency, etc.).", @@ -2455,7 +2465,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADTerminationForConvenienceLegalBenchClassification(AbsTaskClassification): +class CUADTerminationForConvenienceLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADTerminationForConvenienceLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies that one party can terminate this contract without cause (solely by giving a notice and allowing a waiting period to expire).", @@ -2506,7 +2516,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADThirdPartyBeneficiaryLegalBenchClassification(AbsTaskClassification): +class CUADThirdPartyBeneficiaryLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADThirdPartyBeneficiaryLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies that that there a non-contracting party who is a beneficiary to some or all of the clauses in the contract and therefore can enforce its rights against a contracting party.", @@ -2557,7 +2567,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADUncappedLiabilityLegalBenchClassification(AbsTaskClassification): +class CUADUncappedLiabilityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADUncappedLiabilityLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies that a party's liability is uncapped upon the breach of its obligation in the contract. This also includes uncap liability for a particular type of breach such as IP infringement or breach of confidentiality obligation.", @@ -2608,7 +2618,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification(AbsTaskClassification): +class CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause grants one party an “enterprise,” “all you can eat” or unlimited usage license.", @@ -2659,7 +2671,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADVolumeRestrictionLegalBenchClassification(AbsTaskClassification): +class CUADVolumeRestrictionLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADVolumeRestrictionLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a fee increase or consent requirement, etc. if one party's use of the product/services exceeds certain threshold.", @@ -2710,7 +2722,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class CUADWarrantyDurationLegalBenchClassification(AbsTaskClassification): +class CUADWarrantyDurationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CUADWarrantyDurationLegalBenchClassification", description="This task was constructed from the CUAD dataset. It consists of determining if the clause specifies a duration of any warranty against defects or errors in technology, products, or services provided under the contract.", @@ -2761,7 +2773,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class DefinitionClassificationLegalBenchClassification(AbsTaskClassification): +class DefinitionClassificationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DefinitionClassificationLegalBenchClassification", description="This task consists of determining whether or not a sentence from a Supreme Court opinion offers a definition of a term.", @@ -2806,7 +2818,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class Diversity1LegalBenchClassification(AbsTaskClassification): +class Diversity1LegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Diversity1LegalBenchClassification", description="Given a set of facts about the citizenships of plaintiffs and defendants and the amounts associated with claims, determine if the criteria for diversity jurisdiction have been met (variant 1).", @@ -2875,7 +2887,7 @@ def dataset_transform(self): ) -class Diversity2LegalBenchClassification(AbsTaskClassification): +class Diversity2LegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Diversity2LegalBenchClassification", description="Given a set of facts about the citizenships of plaintiffs and defendants and the amounts associated with claims, determine if the criteria for diversity jurisdiction have been met (variant 2).", @@ -2944,7 +2956,7 @@ def dataset_transform(self): ) -class Diversity3LegalBenchClassification(AbsTaskClassification): +class Diversity3LegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Diversity3LegalBenchClassification", description="Given a set of facts about the citizenships of plaintiffs and defendants and the amounts associated with claims, determine if the criteria for diversity jurisdiction have been met (variant 3).", @@ -3013,7 +3025,7 @@ def dataset_transform(self): ) -class Diversity4LegalBenchClassification(AbsTaskClassification): +class Diversity4LegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Diversity4LegalBenchClassification", description="Given a set of facts about the citizenships of plaintiffs and defendants and the amounts associated with claims, determine if the criteria for diversity jurisdiction have been met (variant 4).", @@ -3082,7 +3094,7 @@ def dataset_transform(self): ) -class Diversity5LegalBenchClassification(AbsTaskClassification): +class Diversity5LegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Diversity5LegalBenchClassification", description="Given a set of facts about the citizenships of plaintiffs and defendants and the amounts associated with claims, determine if the criteria for diversity jurisdiction have been met (variant 5).", @@ -3151,7 +3163,7 @@ def dataset_transform(self): ) -class Diversity6LegalBenchClassification(AbsTaskClassification): +class Diversity6LegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Diversity6LegalBenchClassification", description="Given a set of facts about the citizenships of plaintiffs and defendants and the amounts associated with claims, determine if the criteria for diversity jurisdiction have been met (variant 6).", @@ -3220,7 +3232,7 @@ def dataset_transform(self): ) -class FunctionOfDecisionSectionLegalBenchClassification(AbsTaskClassification): +class FunctionOfDecisionSectionLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FunctionOfDecisionSectionLegalBenchClassification", description="""The task is to classify a paragraph extracted from a written court decision into one of seven possible categories: @@ -3276,7 +3288,7 @@ def dataset_transform(self): ) -class InsurancePolicyInterpretationLegalBenchClassification(AbsTaskClassification): +class InsurancePolicyInterpretationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="InsurancePolicyInterpretationLegalBenchClassification", description="Given an insurance claim and policy, determine whether the claim is covered by the policy.", @@ -3321,7 +3333,9 @@ def dataset_transform(self): ) -class InternationalCitizenshipQuestionsLegalBenchClassification(AbsTaskClassification): +class InternationalCitizenshipQuestionsLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="InternationalCitizenshipQuestionsLegalBenchClassification", description="Answer questions about citizenship law from across the world. Dataset was made using the GLOBALCIT citizenship law dataset, by constructing questions about citizenship law as Yes or No questions.", @@ -3378,7 +3392,7 @@ def dataset_transform(self): ) -class JCrewBlockerLegalBenchClassification(AbsTaskClassification): +class JCrewBlockerLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="JCrewBlockerLegalBenchClassification", description="The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of detemining whether the J.Crew Blocker is present in the document.", @@ -3423,7 +3437,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsBenefitsLegalBenchClassification(AbsTaskClassification): +class LearnedHandsBenefitsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsBenefitsLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's legal post discusses public benefits and social services that people can get from the government, like for food, disability, old age, housing, medical help, unemployment, child care, or other social needs.", @@ -3476,7 +3490,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsBusinessLegalBenchClassification(AbsTaskClassification): +class LearnedHandsBusinessLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsBusinessLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's legal question discusses issues faced by people who run small businesses or nonprofits, including around incorporation, licenses, taxes, regulations, and other concerns. It also includes options when there are disasters, bankruptcies, or other problems.", @@ -3529,7 +3543,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsConsumerLegalBenchClassification(AbsTaskClassification): +class LearnedHandsConsumerLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsConsumerLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues people face regarding money, insurance, consumer goods and contracts, taxes, and small claims about quality of service.", @@ -3582,7 +3596,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsCourtsLegalBenchClassification(AbsTaskClassification): +class LearnedHandsCourtsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsCourtsLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses the logistics of how a person can interact with a lawyer or the court system. It applies to situations about procedure, rules, how to file lawsuits, how to hire lawyers, how to represent oneself, and other practical matters about dealing with these systems.", @@ -3635,7 +3649,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsCrimeLegalBenchClassification(AbsTaskClassification): +class LearnedHandsCrimeLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsCrimeLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues in the criminal system including when people are charged with crimes, go to a criminal trial, go to prison, or are a victim of a crime.", @@ -3688,7 +3702,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsDivorceLegalBenchClassification(AbsTaskClassification): +class LearnedHandsDivorceLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsDivorceLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues around filing for divorce, separation, or annulment, getting spousal support, splitting money and property, and following the court processes.", @@ -3741,7 +3755,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsDomesticViolenceLegalBenchClassification(AbsTaskClassification): +class LearnedHandsDomesticViolenceLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsDomesticViolenceLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses dealing with domestic violence and abuse, including getting protective orders, enforcing them, understanding abuse, reporting abuse, and getting resources and status if there is abuse.", @@ -3794,7 +3808,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsEducationLegalBenchClassification(AbsTaskClassification): +class LearnedHandsEducationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsEducationLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues around school, including accommodations for special needs, discrimination, student debt, discipline, and other issues in education.", @@ -3847,7 +3861,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsEmploymentLegalBenchClassification(AbsTaskClassification): +class LearnedHandsEmploymentLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsEmploymentLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues related to working at a job, including discrimination and harassment, worker's compensation, workers rights, unions, getting paid, pensions, being fired, and more.", @@ -3900,7 +3914,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsEstatesLegalBenchClassification(AbsTaskClassification): +class LearnedHandsEstatesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsEstatesLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses planning for end-of-life, possible incapacitation, and other special circumstances that would prevent a person from making decisions about their own well-being, finances, and property. This includes issues around wills, powers of attorney, advance directives, trusts, guardianships, conservatorships, and other estate issues that people and families deal with.", @@ -3953,7 +3967,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsFamilyLegalBenchClassification(AbsTaskClassification): +class LearnedHandsFamilyLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsFamilyLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues that arise within a family, like divorce, adoption, name change, guardianship, domestic violence, child custody, and other issues.", @@ -4009,7 +4023,7 @@ def dataset_transform(self): ) -class LearnedHandsHealthLegalBenchClassification(AbsTaskClassification): +class LearnedHandsHealthLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsHealthLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues with accessing health services, paying for medical care, getting public benefits for health care, protecting one's rights in medical settings, and other issues related to health.", @@ -4062,7 +4076,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsHousingLegalBenchClassification(AbsTaskClassification): +class LearnedHandsHousingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsHousingLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses issues with paying your rent or mortgage, landlord-tenant issues, housing subsidies and public housing, eviction, and other problems with your apartment, mobile home, or house.", @@ -4118,7 +4132,7 @@ def dataset_transform(self): ) -class LearnedHandsImmigrationLegalBenchClassification(AbsTaskClassification): +class LearnedHandsImmigrationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsImmigrationLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's post discusses visas, asylum, green cards, citizenship, migrant work and benefits, and other issues faced by people who are not full citizens in the US.", @@ -4171,7 +4185,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsTortsLegalBenchClassification(AbsTaskClassification): +class LearnedHandsTortsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsTortsLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's legal question discusses problems that one person has with another person (or animal), like when there is a car accident, a dog bite, bullying or possible harassment, or neighbors treating each other badly.", @@ -4224,7 +4238,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LearnedHandsTrafficLegalBenchClassification(AbsTaskClassification): +class LearnedHandsTrafficLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LearnedHandsTrafficLegalBenchClassification", description="This is a binary classification task in which the model must determine if a user's legal post discusses problems with traffic and parking tickets, fees, driver's licenses, and other issues experienced with the traffic system. It also concerns issues with car accidents and injuries, cars' quality, repairs, purchases, and other contracts.", @@ -4277,7 +4291,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class LegalReasoningCausalityLegalBenchClassification(AbsTaskClassification): +class LegalReasoningCausalityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LegalReasoningCausalityLegalBenchClassification", description="Given an excerpt from a district court opinion, classify if it relies on statistical evidence in its reasoning.", @@ -4464,7 +4478,7 @@ def dataset_transform(self): ] -class MAUDLegalBenchClassification(AbsTaskClassification): +class MAUDLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MAUDLegalBenchClassification", description="""This task was constructed from the MAUD dataset, which consists of over 47,000 labels across 152 merger agreements annotated to identify 92 questions in each agreement used by the 2021 American Bar Association (ABA) Public Target Deal Points Study. Each dataset is formatted as a series of multiple-choice questions, where given a segment of the merger agreement and a Deal Point question, the model is to choose the answer that best characterizes the agreement as response. @@ -4607,7 +4621,7 @@ def dataset_transform(self): ) -class NYSJudicialEthicsLegalBenchClassification(AbsTaskClassification): +class NYSJudicialEthicsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NYSJudicialEthicsLegalBenchClassification", description="Answer questions on judicial ethics from the New York State Unified Court System Advisory Committee.", @@ -4654,7 +4668,7 @@ def dataset_transform(self): ) -class OPP115DataRetentionLegalBenchClassification(AbsTaskClassification): +class OPP115DataRetentionLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115DataRetentionLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describes how long user information is stored.", @@ -4706,7 +4720,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115DataSecurityLegalBenchClassification(AbsTaskClassification): +class OPP115DataSecurityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115DataSecurityLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describes how user information is protected.", @@ -4758,7 +4772,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115DoNotTrackLegalBenchClassification(AbsTaskClassification): +class OPP115DoNotTrackLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115DoNotTrackLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describes if and how Do Not Track signals for online tracking and advertising are honored.", @@ -4810,7 +4824,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115FirstPartyCollectionUseLegalBenchClassification(AbsTaskClassification): +class OPP115FirstPartyCollectionUseLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115FirstPartyCollectionUseLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describes how and why a service provider collects user information.", @@ -4863,7 +4877,7 @@ def dataset_transform(self): class OPP115InternationalAndSpecificAudiencesLegalBenchClassification( - AbsTaskClassification + AbsTaskAnyClassification ): metadata = TaskMetadata( name="OPP115InternationalAndSpecificAudiencesLegalBenchClassification", @@ -4916,7 +4930,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115PolicyChangeLegalBenchClassification(AbsTaskClassification): +class OPP115PolicyChangeLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115PolicyChangeLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describes if and how users will be informed about changes to the privacy policy.", @@ -4968,7 +4982,9 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115ThirdPartySharingCollectionLegalBenchClassification(AbsTaskClassification): +class OPP115ThirdPartySharingCollectionLegalBenchClassification( + AbsTaskAnyClassification +): metadata = TaskMetadata( name="OPP115ThirdPartySharingCollectionLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describe how user information may be shared with or collected by third parties.", @@ -5020,7 +5036,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115UserAccessEditAndDeletionLegalBenchClassification(AbsTaskClassification): +class OPP115UserAccessEditAndDeletionLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115UserAccessEditAndDeletionLegalBenchClassification", description="Given a clause from a privacy policy, classify if the clause describes if and how users may access, edit, or delete their information.", @@ -5072,7 +5088,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OPP115UserChoiceControlLegalBenchClassification(AbsTaskClassification): +class OPP115UserChoiceControlLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OPP115UserChoiceControlLegalBenchClassification", description="Given a clause fro ma privacy policy, classify if the clause describes the choices and control options available to users.", @@ -5124,7 +5140,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class OralArgumentQuestionPurposeLegalBenchClassification(AbsTaskClassification): +class OralArgumentQuestionPurposeLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OralArgumentQuestionPurposeLegalBenchClassification", description="""This task classifies questions asked by Supreme Court justices at oral argument into seven categories: @@ -5173,7 +5189,7 @@ def dataset_transform(self): ) -class OverrulingLegalBenchClassification(AbsTaskClassification): +class OverrulingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OverrulingLegalBenchClassification", description="""This task consists of classifying whether or not a particular sentence of case law overturns the decision of a previous case.""", @@ -5228,7 +5244,7 @@ def dataset_transform(self): ) -class PersonalJurisdictionLegalBenchClassification(AbsTaskClassification): +class PersonalJurisdictionLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PersonalJurisdictionLegalBenchClassification", description="""Given a fact pattern describing the set of contacts between a plaintiff, defendant, and forum, determine if a court in that forum could excercise personal jurisdiction over the defendant.""", @@ -5273,7 +5289,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class PROALegalBenchClassification(AbsTaskClassification): +class PROALegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PROALegalBenchClassification", description="""Given a statute, determine if the text contains an explicit private right of action. Given a privacy policy clause and a description of the clause, determine if the description is correct. A private right of action (PROA) exists when a statute empowers an ordinary individual (i.e., a private person) to legally enforce their rights by bringing an action in court. In short, a PROA creates the ability for an individual to sue someone in order to recover damages or halt some offending conduct. PROAs are ubiquitous in antitrust law (in which individuals harmed by anti-competitive behavior can sue offending firms for compensation) and environmental law (in which individuals can sue entities which release hazardous substances for damages).""", @@ -5318,7 +5334,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDBPAccountabilityLegalBenchClassification(AbsTaskClassification): +class SCDBPAccountabilityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDBPAccountabilityLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose whether the retail seller or manufacturer maintains internal compliance procedures on company standards regarding human trafficking and slavery? This includes any type of internal accountability mechanism. Requiring independently of the supply to comply with laws does not qualify or asking for documentary evidence of compliance does not count either.'", @@ -5372,7 +5388,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDBPAuditsLegalBenchClassification(AbsTaskClassification): +class SCDBPAuditsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDBPAuditsLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose whether the retail seller or manufacturer performs any type of audit, or reserves the right to audit?'", @@ -5426,7 +5442,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDBPCertificationLegalBenchClassification(AbsTaskClassification): +class SCDBPCertificationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDBPCertificationLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose whether the retail seller or manufacturer performs any type of audit, or reserves the right to audit?'", @@ -5480,7 +5496,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDBPTrainingLegalBenchClassification(AbsTaskClassification): +class SCDBPTrainingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDBPTrainingLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose whether the retail seller or manufacturer provides training to employees on human trafficking and slavery? Broad policies such as ongoing dialogue on mitigating risks of human trafficking and slavery or increasing managers and purchasers knowledge about health, safety and labor practices qualify as training. Providing training to contractors who failed to comply with human trafficking laws counts as training.'", @@ -5534,7 +5550,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDBPVerificationLegalBenchClassification(AbsTaskClassification): +class SCDBPVerificationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDBPVerificationLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose whether the retail seller or manufacturer engages in verification and auditing as one practice, expresses that it may conduct an audit, or expressess that it is assessing supplier risks through a review of the US Dept. of Labor's List?'", @@ -5588,7 +5604,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDDAccountabilityLegalBenchClassification(AbsTaskClassification): +class SCDDAccountabilityLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDDAccountabilityLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose to what extent, if any, that the retail seller or manufacturer maintains internal accountability standards and procedures for employees or contractors failing to meet company standards regarding slavery and trafficking?'", @@ -5642,7 +5658,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDDAuditsLegalBenchClassification(AbsTaskClassification): +class SCDDAuditsLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDDAuditsLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose to what extent, if any, that the retail seller or manufacturer conducts audits of suppliers to evaluate supplier compliance with company standards for trafficking and slavery in supply chains? The disclosure shall specify if the verification was not an independent, unannounced audit.'", @@ -5696,7 +5712,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDDCertificationLegalBenchClassification(AbsTaskClassification): +class SCDDCertificationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDDCertificationLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose to what extent, if any, that the retail seller or manufacturer requires direct suppliers to certify that materials incorporated into the product comply with the laws regarding slavery and human trafficking of the country or countries in which they are doing business?'", @@ -5750,7 +5766,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDDTrainingLegalBenchClassification(AbsTaskClassification): +class SCDDTrainingLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDDTrainingLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose to what extent, if any, that the retail seller or manufacturer provides company employees and management, who have direct responsibility for supply chain management, training on human trafficking and slavery, particularly with respect to mitigating risks within the supply chains of products?'", @@ -5804,7 +5820,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class SCDDVerificationLegalBenchClassification(AbsTaskClassification): +class SCDDVerificationLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SCDDVerificationLegalBenchClassification", description="This is a binary classification task in which the LLM must determine if a supply chain disclosure meets the following coding criteria: 'Does the above statement disclose to what extent, if any, that the retail seller or manufacturer engages in verification of product supply chains to evaluate and address risks of human trafficking and slavery? If the company conducts verification], the disclosure shall specify if the verification was not conducted by a third party.'", @@ -5858,7 +5874,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class TelemarketingSalesRuleLegalBenchClassification(AbsTaskClassification): +class TelemarketingSalesRuleLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TelemarketingSalesRuleLegalBenchClassification", description="Determine how 16 C.F.R. § 310.3(a)(1) and 16 C.F.R. § 310.3(a)(2) (governing deceptive practices) apply to different fact patterns. This dataset is designed to test a model’s ability to apply 16 C.F.R. § 310.3(a)(1) and 16 C.F.R. § 310.3(a)(2) of the Telemarketing Sales Rule to a simple fact pattern with a clear outcome. Each fact pattern ends with the question: “Is this a violation of the Telemarketing Sales Rule?” Each fact pattern is paired with the answer “Yes” or the answer “No.” Fact patterns are listed in the column “text,” and answers are listed in the column “label.”", @@ -5903,7 +5919,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class TextualismToolDictionariesLegalBenchClassification(AbsTaskClassification): +class TextualismToolDictionariesLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TextualismToolDictionariesLegalBenchClassification", description="Determine if a paragraph from a judicial opinion is applying a form textualism that relies on the dictionary meaning of terms.", @@ -5948,7 +5964,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class TextualismToolPlainLegalBenchClassification(AbsTaskClassification): +class TextualismToolPlainLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TextualismToolPlainLegalBenchClassification", description="Determine if a paragraph from a judicial opinion is applying a form textualism that relies on the ordinary (“plain”) meaning of terms.", @@ -5993,7 +6009,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("answer", "label") -class UCCVCommonLawLegalBenchClassification(AbsTaskClassification): +class UCCVCommonLawLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="UCCVCommonLawLegalBenchClassification", description="Determine if a contract is governed by the Uniform Commercial Code (UCC) or the common law of contracts.", @@ -6040,7 +6056,7 @@ def dataset_transform(self): ) -class UnfairTOSLegalBenchClassification(AbsTaskClassification): +class UnfairTOSLegalBenchClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="UnfairTOSLegalBenchClassification", description="Given a clause from a terms-of-service contract, determine the category the clause belongs to. The purpose of this task is classifying clauses in Terms of Service agreements. Clauses have been annotated by into nine categories: ['Arbitration', 'Unilateral change', 'Content removal', 'Jurisdiction', 'Choice of law', 'Limitation of liability', 'Unilateral termination', 'Contract by using', 'Other']. The first eight categories correspond to clauses that would potentially be deemed potentially unfair. The last category (Other) corresponds to clauses in agreements which don’t fit into these categories.", diff --git a/mteb/tasks/Classification/eng/NewsClassification.py b/mteb/tasks/Classification/eng/NewsClassification.py index 84ac1f31a5..50985878ae 100644 --- a/mteb/tasks/Classification/eng/NewsClassification.py +++ b/mteb/tasks/Classification/eng/NewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NewsClassification(AbsTaskClassification): +class NewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NewsClassification", description="Large News Classification Dataset", diff --git a/mteb/tasks/Classification/eng/PatentClassification.py b/mteb/tasks/Classification/eng/PatentClassification.py index 17b09f3b36..a69cdb13d1 100644 --- a/mteb/tasks/Classification/eng/PatentClassification.py +++ b/mteb/tasks/Classification/eng/PatentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class PatentClassification(AbsTaskClassification): +class PatentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PatentClassification", description="Classification Dataset of Patents and Abstract", diff --git a/mteb/tasks/Classification/eng/PoemSentimentClassification.py b/mteb/tasks/Classification/eng/PoemSentimentClassification.py index 44ad69846d..7a1addbaa3 100644 --- a/mteb/tasks/Classification/eng/PoemSentimentClassification.py +++ b/mteb/tasks/Classification/eng/PoemSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class PoemSentimentClassification(AbsTaskClassification): +class PoemSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PoemSentimentClassification", description="Poem Sentiment is a sentiment dataset of poem verses from Project Gutenberg.", diff --git a/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py index 019cddb598..7cd63cc38e 100644 --- a/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py +++ b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SDSEyeProtectionClassification(AbsTaskClassification): +class SDSEyeProtectionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SDSEyeProtectionClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/SDSGlovesClassification.py b/mteb/tasks/Classification/eng/SDSGlovesClassification.py index 72e9d3bc7e..1ad19a1fea 100644 --- a/mteb/tasks/Classification/eng/SDSGlovesClassification.py +++ b/mteb/tasks/Classification/eng/SDSGlovesClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SDSGlovesClassification(AbsTaskClassification): +class SDSGlovesClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SDSGlovesClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/ToxicChatClassification.py b/mteb/tasks/Classification/eng/ToxicChatClassification.py index afa21ae76d..14fc7325d6 100644 --- a/mteb/tasks/Classification/eng/ToxicChatClassification.py +++ b/mteb/tasks/Classification/eng/ToxicChatClassification.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _EVAL_SPLITS = ["test"] -class ToxicChatClassification(AbsTaskClassification): +class ToxicChatClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ToxicChatClassification", description="""This dataset contains toxicity annotations on 10K user diff --git a/mteb/tasks/Classification/eng/ToxicConversationsClassification.py b/mteb/tasks/Classification/eng/ToxicConversationsClassification.py index 5736eced54..c80cb0e66c 100644 --- a/mteb/tasks/Classification/eng/ToxicConversationsClassification.py +++ b/mteb/tasks/Classification/eng/ToxicConversationsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class ToxicConversationsClassification(AbsTaskClassification): +class ToxicConversationsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ToxicConversationsClassification", description="Collection of comments from the Civil Comments platform together with annotations if the comment is toxic or not.", diff --git a/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py b/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py index dc8d443178..db55a98996 100644 --- a/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py +++ b/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TweetSentimentExtractionClassification(AbsTaskClassification): +class TweetSentimentExtractionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TweetSentimentExtractionClassification", description="", diff --git a/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py b/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py index 726b24ace9..6cf9df99c2 100644 --- a/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py +++ b/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TweetTopicSingleClassification(AbsTaskClassification): +class TweetTopicSingleClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TweetTopicSingleClassification", description="""Topic classification dataset on Twitter with 6 labels. Each instance of diff --git a/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py index 4030976e4c..a41b61f06d 100644 --- a/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaBioMetChemClassification(AbsTaskClassification): +class WikipediaBioMetChemClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaBioMetChemClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py index 036ea1d04e..fb4b0065c4 100644 --- a/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaBiolumNeurochemClassification(AbsTaskClassification): +class WikipediaBiolumNeurochemClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaBiolumNeurochemClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py index 11ba8ac25e..4040d49126 100644 --- a/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaChemEngSpecialtiesClassification(AbsTaskClassification): +class WikipediaChemEngSpecialtiesClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaChemEngSpecialtiesClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py index f2d0bb906d..399becd325 100644 --- a/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaChemFieldsClassification(AbsTaskClassification): +class WikipediaChemFieldsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaChemFieldsClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py index 70d74afda2..d9df199b73 100644 --- a/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaChemistryTopicsClassification(AbsTaskClassification): +class WikipediaChemistryTopicsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaChemistryTopicsClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py index 717f846983..6deb2d1618 100644 --- a/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaCompChemSpectroscopyClassification(AbsTaskClassification): +class WikipediaCompChemSpectroscopyClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaCompChemSpectroscopyClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py index c255bf4d64..02d9e7c3c4 100644 --- a/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaCryobiologySeparationClassification(AbsTaskClassification): +class WikipediaCryobiologySeparationClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaCryobiologySeparationClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py index a0defb6033..de177c2df4 100644 --- a/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaCrystallographyAnalyticalClassification(AbsTaskClassification): +class WikipediaCrystallographyAnalyticalClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaCrystallographyAnalyticalClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py index bc2d1345d9..94aa91fb39 100644 --- a/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaGreenhouseEnantiopureClassification(AbsTaskClassification): +class WikipediaGreenhouseEnantiopureClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaGreenhouseEnantiopureClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py index 6ffe6794fb..17c5f6603d 100644 --- a/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaIsotopesFissionClassification(AbsTaskClassification): +class WikipediaIsotopesFissionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaIsotopesFissionClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py index 89e963dd1c..b320e50560 100644 --- a/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaLuminescenceClassification(AbsTaskClassification): +class WikipediaLuminescenceClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaLuminescenceClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py index 59b1e775e3..3a003a541b 100644 --- a/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaOrganicInorganicClassification(AbsTaskClassification): +class WikipediaOrganicInorganicClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaOrganicInorganicClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py index dbd96fb194..219856c593 100644 --- a/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaSaltsSemiconductorsClassification(AbsTaskClassification): +class WikipediaSaltsSemiconductorsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaSaltsSemiconductorsClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py index 998e7ccc6e..ae3492157a 100644 --- a/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaSolidStateColloidalClassification(AbsTaskClassification): +class WikipediaSolidStateColloidalClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaSolidStateColloidalClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py index ce1c321d06..08c0a2ed36 100644 --- a/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WikipediaTheoreticalAppliedClassification(AbsTaskClassification): +class WikipediaTheoreticalAppliedClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WikipediaTheoreticalAppliedClassification", description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", diff --git a/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py b/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py index d81330887d..cb9330b8e2 100644 --- a/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py +++ b/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class YahooAnswersTopicsClassification(AbsTaskClassification): +class YahooAnswersTopicsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="YahooAnswersTopicsClassification", description="Dataset composed of questions and answers from Yahoo Answers, categorized into topics.", diff --git a/mteb/tasks/Classification/eng/YelpReviewFullClassification.py b/mteb/tasks/Classification/eng/YelpReviewFullClassification.py index 901502874e..99b09b741e 100644 --- a/mteb/tasks/Classification/eng/YelpReviewFullClassification.py +++ b/mteb/tasks/Classification/eng/YelpReviewFullClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class YelpReviewFullClassification(AbsTaskClassification): +class YelpReviewFullClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="YelpReviewFullClassification", description="Yelp Review Full is a dataset for sentiment analysis, containing 5 classes corresponding to ratings 1-5.", diff --git a/mteb/tasks/Classification/est/estonian_valence.py b/mteb/tasks/Classification/est/estonian_valence.py index d50b5a45ef..ecc83f4375 100644 --- a/mteb/tasks/Classification/est/estonian_valence.py +++ b/mteb/tasks/Classification/est/estonian_valence.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class EstonianValenceClassification(AbsTaskClassification): +class EstonianValenceClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="EstonianValenceClassification", dataset={ diff --git a/mteb/tasks/Classification/fas/FaMTEBClassification.py b/mteb/tasks/Classification/fas/FaMTEBClassification.py index 6930c201d9..462c6a6084 100644 --- a/mteb/tasks/Classification/fas/FaMTEBClassification.py +++ b/mteb/tasks/Classification/fas/FaMTEBClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SynPerChatbotConvSAAnger(AbsTaskClassification): +class SynPerChatbotConvSAAnger(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAAnger", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Anger", @@ -31,7 +31,7 @@ class SynPerChatbotConvSAAnger(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSASatisfaction(AbsTaskClassification): +class SynPerChatbotConvSASatisfaction(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSASatisfaction", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Satisfaction", @@ -58,7 +58,7 @@ class SynPerChatbotConvSASatisfaction(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSAFriendship(AbsTaskClassification): +class SynPerChatbotConvSAFriendship(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAFriendship", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Friendship", @@ -85,7 +85,7 @@ class SynPerChatbotConvSAFriendship(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSAFear(AbsTaskClassification): +class SynPerChatbotConvSAFear(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAFear", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Fear", @@ -112,7 +112,7 @@ class SynPerChatbotConvSAFear(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSAJealousy(AbsTaskClassification): +class SynPerChatbotConvSAJealousy(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAJealousy", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Jealousy", @@ -139,7 +139,7 @@ class SynPerChatbotConvSAJealousy(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSASurprise(AbsTaskClassification): +class SynPerChatbotConvSASurprise(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSASurprise", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Surprise", @@ -166,7 +166,7 @@ class SynPerChatbotConvSASurprise(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSALove(AbsTaskClassification): +class SynPerChatbotConvSALove(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSALove", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Love", @@ -193,7 +193,7 @@ class SynPerChatbotConvSALove(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSASadness(AbsTaskClassification): +class SynPerChatbotConvSASadness(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSASadness", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Sadness", @@ -220,7 +220,7 @@ class SynPerChatbotConvSASadness(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSAHappiness(AbsTaskClassification): +class SynPerChatbotConvSAHappiness(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAHappiness", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Happiness", @@ -247,7 +247,7 @@ class SynPerChatbotConvSAHappiness(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSAToneChatbotClassification(AbsTaskClassification): +class SynPerChatbotConvSAToneChatbotClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAToneChatbotClassification", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Tone Chatbot Classification", @@ -274,7 +274,7 @@ class SynPerChatbotConvSAToneChatbotClassification(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotConvSAToneUserClassification(AbsTaskClassification): +class SynPerChatbotConvSAToneUserClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotConvSAToneUserClassification", description="Synthetic Persian Chatbot Conversational Sentiment Analysis Tone User", @@ -301,7 +301,7 @@ class SynPerChatbotConvSAToneUserClassification(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotSatisfactionLevelClassification(AbsTaskClassification): +class SynPerChatbotSatisfactionLevelClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotSatisfactionLevelClassification", description="Synthetic Persian Chatbot Satisfaction Level Classification", @@ -328,7 +328,7 @@ class SynPerChatbotSatisfactionLevelClassification(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotRAGToneChatbotClassification(AbsTaskClassification): +class SynPerChatbotRAGToneChatbotClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotRAGToneChatbotClassification", description="Synthetic Persian Chatbot RAG Tone Chatbot Classification", @@ -355,7 +355,7 @@ class SynPerChatbotRAGToneChatbotClassification(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotRAGToneUserClassification(AbsTaskClassification): +class SynPerChatbotRAGToneUserClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotRAGToneUserClassification", description="Synthetic Persian Chatbot RAG Tone User Classification", @@ -382,7 +382,7 @@ class SynPerChatbotRAGToneUserClassification(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotToneChatbotClassification(AbsTaskClassification): +class SynPerChatbotToneChatbotClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotToneChatbotClassification", description="Synthetic Persian Chatbot Tone Chatbot Classification", @@ -409,7 +409,7 @@ class SynPerChatbotToneChatbotClassification(AbsTaskClassification): samples_per_label = 32 -class SynPerChatbotToneUserClassification(AbsTaskClassification): +class SynPerChatbotToneUserClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SynPerChatbotToneUserClassification", description="Synthetic Persian Chatbot Tone User Classification", @@ -436,7 +436,7 @@ class SynPerChatbotToneUserClassification(AbsTaskClassification): samples_per_label = 32 -class PersianTextTone(AbsTaskClassification): +class PersianTextTone(AbsTaskAnyClassification): metadata = TaskMetadata( name="PersianTextTone", description="Persian Text Tone", @@ -463,7 +463,7 @@ class PersianTextTone(AbsTaskClassification): samples_per_label = 32 -class SIDClassification(AbsTaskClassification): +class SIDClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SIDClassification", description="SID Classification", @@ -490,7 +490,7 @@ class SIDClassification(AbsTaskClassification): samples_per_label = 32 -class DeepSentiPers(AbsTaskClassification): +class DeepSentiPers(AbsTaskAnyClassification): metadata = TaskMetadata( name="DeepSentiPers", description="Persian Sentiment Analysis Dataset", @@ -520,7 +520,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("review", "text") -class PersianTextEmotion(AbsTaskClassification): +class PersianTextEmotion(AbsTaskAnyClassification): metadata = TaskMetadata( name="PersianTextEmotion", description="Emotion is a Persian dataset with six basic emotions: anger, fear, joy, love, sadness, and surprise.", @@ -547,7 +547,7 @@ class PersianTextEmotion(AbsTaskClassification): samples_per_label = 32 -class SentimentDKSF(AbsTaskClassification): +class SentimentDKSF(AbsTaskAnyClassification): metadata = TaskMetadata( name="SentimentDKSF", description="The Sentiment DKSF (Digikala/Snappfood comments) is a dataset for sentiment analysis.", @@ -574,7 +574,7 @@ class SentimentDKSF(AbsTaskClassification): samples_per_label = 32 -class NLPTwitterAnalysisClassification(AbsTaskClassification): +class NLPTwitterAnalysisClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NLPTwitterAnalysisClassification", description="Twitter Analysis Classification", @@ -604,7 +604,7 @@ def dataset_transform(self): self.dataset = self.dataset.rename_column("tweet", "text") -class DigikalamagClassification(AbsTaskClassification): +class DigikalamagClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DigikalamagClassification", description="A total of 8,515 articles scraped from Digikala Online Magazine. This dataset includes seven different classes.", diff --git a/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py b/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py index 91ba97733a..a783c1ddb7 100644 --- a/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py +++ b/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata TEST_SAMPLES = 2048 -class PersianFoodSentimentClassification(AbsTaskClassification): +class PersianFoodSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PersianFoodSentimentClassification", description="Persian Food Review Dataset", diff --git a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py index 7436ea2c25..95fdd29766 100644 --- a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata TEST_SAMPLES = 2048 -class FilipinoHateSpeechClassification(AbsTaskClassification): +class FilipinoHateSpeechClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FilipinoHateSpeechClassification", description="Filipino Twitter dataset for sentiment classification.", diff --git a/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py b/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py index d9559be769..9dacc6efdc 100644 --- a/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FilipinoShopeeReviewsClassification(AbsTaskClassification): +class FilipinoShopeeReviewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FilipinoShopeeReviewsClassification", description="The Shopee reviews tl 15 dataset is constructed by randomly taking 2100 training samples and 450 samples for testing and validation for each review star from 1 to 5. In total, there are 10500 training samples and 2250 each in validation and testing samples.", diff --git a/mteb/tasks/Classification/fin/FinToxicityClassification.py b/mteb/tasks/Classification/fin/FinToxicityClassification.py index d497899a7c..2e753d9746 100644 --- a/mteb/tasks/Classification/fin/FinToxicityClassification.py +++ b/mteb/tasks/Classification/fin/FinToxicityClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FinToxicityClassification(AbsTaskClassification): +class FinToxicityClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FinToxicityClassification", description=""" diff --git a/mteb/tasks/Classification/fra/FrenchBookReviews.py b/mteb/tasks/Classification/fra/FrenchBookReviews.py index 51e672be20..d0bbbcc1c4 100644 --- a/mteb/tasks/Classification/fra/FrenchBookReviews.py +++ b/mteb/tasks/Classification/fra/FrenchBookReviews.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FrenchBookReviews(AbsTaskClassification): +class FrenchBookReviews(AbsTaskAnyClassification): metadata = TaskMetadata( name="FrenchBookReviews", dataset={ diff --git a/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py b/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py index 452039e5d1..7fbf8a4552 100644 --- a/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class MovieReviewSentimentClassification(AbsTaskClassification): +class MovieReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MovieReviewSentimentClassification", dataset={ diff --git a/mteb/tasks/Classification/guj/GujaratiNewsClassification.py b/mteb/tasks/Classification/guj/GujaratiNewsClassification.py index 8cc5cecb5a..13d2cfe1f4 100644 --- a/mteb/tasks/Classification/guj/GujaratiNewsClassification.py +++ b/mteb/tasks/Classification/guj/GujaratiNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class GujaratiNewsClassification(AbsTaskClassification): +class GujaratiNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="GujaratiNewsClassification", description="A Gujarati dataset for 3-class classification of Gujarati news articles", diff --git a/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py b/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py index 8d331b047d..801c843111 100644 --- a/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py +++ b/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification # type: ignore from mteb.abstasks.TaskMetadata import TaskMetadata # type: ignore -class HebrewSentimentAnalysis(AbsTaskClassification): +class HebrewSentimentAnalysis(AbsTaskAnyClassification): metadata = TaskMetadata( name="HebrewSentimentAnalysis", dataset={ diff --git a/mteb/tasks/Classification/hin/HindiDiscourseClassification.py b/mteb/tasks/Classification/hin/HindiDiscourseClassification.py index 0ecaa7aa19..f224bf77e9 100644 --- a/mteb/tasks/Classification/hin/HindiDiscourseClassification.py +++ b/mteb/tasks/Classification/hin/HindiDiscourseClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class HindiDiscourseClassification(AbsTaskClassification): +class HindiDiscourseClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="HindiDiscourseClassification", dataset={ diff --git a/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py b/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py index d973196fea..2a7851f593 100644 --- a/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py +++ b/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SentimentAnalysisHindi(AbsTaskClassification): +class SentimentAnalysisHindi(AbsTaskAnyClassification): metadata = TaskMetadata( name="SentimentAnalysisHindi", description="Hindi Sentiment Analysis Dataset", diff --git a/mteb/tasks/Classification/hrv/FrenkHrClassification.py b/mteb/tasks/Classification/hrv/FrenkHrClassification.py index bf0a98e565..490be57b26 100644 --- a/mteb/tasks/Classification/hrv/FrenkHrClassification.py +++ b/mteb/tasks/Classification/hrv/FrenkHrClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FrenkHrClassification(AbsTaskClassification): +class FrenkHrClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FrenkHrClassification", description="Croatian subset of the FRENK dataset", diff --git a/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py b/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py index f7b9bcd44d..85fb2b5432 100644 --- a/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py +++ b/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class IndonesianIdClickbaitClassification(AbsTaskClassification): +class IndonesianIdClickbaitClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="IndonesianIdClickbaitClassification", dataset={ diff --git a/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py b/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py index 73f47808cc..1164ae58cb 100644 --- a/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py +++ b/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py @@ -5,11 +5,11 @@ import datasets import numpy as np -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class IndonesianMongabayConservationClassification(AbsTaskClassification): +class IndonesianMongabayConservationClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="IndonesianMongabayConservationClassification", description="Conservation dataset that was collected from mongabay.co.id contains topic-classification task (multi-label format) and sentiment classification. This task only covers sentiment analysis (positive, neutral negative)", diff --git a/mteb/tasks/Classification/ita/ItaCaseholdClassification.py b/mteb/tasks/Classification/ita/ItaCaseholdClassification.py index 2c506a5d1b..da78b93eb8 100644 --- a/mteb/tasks/Classification/ita/ItaCaseholdClassification.py +++ b/mteb/tasks/Classification/ita/ItaCaseholdClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class ItaCaseholdClassification(AbsTaskClassification): +class ItaCaseholdClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ItaCaseholdClassification", dataset={ diff --git a/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py b/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py index 339062e0db..a832e7c4be 100644 --- a/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py +++ b/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class ItalianLinguisticAcceptabilityClassification(AbsTaskClassification): +class ItalianLinguisticAcceptabilityClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="Itacola", dataset={ diff --git a/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py b/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py index 7781d23087..49a4044f04 100644 --- a/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py +++ b/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class JavaneseIMDBClassification(AbsTaskClassification): +class JavaneseIMDBClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="JavaneseIMDBClassification", description="Large Movie Review Dataset translated to Javanese. This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets.", diff --git a/mteb/tasks/Classification/jpn/WRIMEClassification.py b/mteb/tasks/Classification/jpn/WRIMEClassification.py index d8cec64b68..7c5f5f2dec 100644 --- a/mteb/tasks/Classification/jpn/WRIMEClassification.py +++ b/mteb/tasks/Classification/jpn/WRIMEClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WRIMEClassification(AbsTaskClassification): +class WRIMEClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WRIMEClassification", dataset={ diff --git a/mteb/tasks/Classification/kan/KannadaNewsClassification.py b/mteb/tasks/Classification/kan/KannadaNewsClassification.py index 1aae24ecb2..5fc54d88d7 100644 --- a/mteb/tasks/Classification/kan/KannadaNewsClassification.py +++ b/mteb/tasks/Classification/kan/KannadaNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class KannadaNewsClassification(AbsTaskClassification): +class KannadaNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="KannadaNewsClassification", description="The Kannada news dataset contains only the headlines of news article in three categories: Entertainment, Tech, and Sports. The data set contains around 6300 news article headlines which are collected from Kannada news websites. The data set has been cleaned and contains train and test set using which can be used to benchmark topic classification models in Kannada.", diff --git a/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py b/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py index c74ac21dfc..9109aa4b86 100644 --- a/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py +++ b/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class GeorgianSentimentClassification(AbsTaskClassification): +class GeorgianSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="GeorgianSentimentClassification", description="Goergian Sentiment Dataset", diff --git a/mteb/tasks/Classification/kor/KlueTC.py b/mteb/tasks/Classification/kor/KlueTC.py index a20ab92658..5c48e7a434 100644 --- a/mteb/tasks/Classification/kor/KlueTC.py +++ b/mteb/tasks/Classification/kor/KlueTC.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class KlueTC(AbsTaskClassification): +class KlueTC(AbsTaskAnyClassification): metadata = TaskMetadata( name="KLUE-TC", dataset={ diff --git a/mteb/tasks/Classification/kor/KorFin.py b/mteb/tasks/Classification/kor/KorFin.py index 8080bb5635..5a500728d2 100644 --- a/mteb/tasks/Classification/kor/KorFin.py +++ b/mteb/tasks/Classification/kor/KorFin.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification # type: ignore from mteb.abstasks.TaskMetadata import TaskMetadata # type: ignore -class KorFin(AbsTaskClassification): +class KorFin(AbsTaskAnyClassification): metadata = TaskMetadata( name="KorFin", dataset={ diff --git a/mteb/tasks/Classification/kor/KorHateClassification.py b/mteb/tasks/Classification/kor/KorHateClassification.py index f4079e6175..f9a5b8e182 100644 --- a/mteb/tasks/Classification/kor/KorHateClassification.py +++ b/mteb/tasks/Classification/kor/KorHateClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class KorHateClassification(AbsTaskClassification): +class KorHateClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="KorHateClassification", description="""The dataset was created to provide the first human-labeled Korean corpus for diff --git a/mteb/tasks/Classification/kor/KorSarcasmClassification.py b/mteb/tasks/Classification/kor/KorSarcasmClassification.py index 3471f5dd56..f7cbc99ecb 100644 --- a/mteb/tasks/Classification/kor/KorSarcasmClassification.py +++ b/mteb/tasks/Classification/kor/KorSarcasmClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class KorSarcasmClassification(AbsTaskClassification): +class KorSarcasmClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="KorSarcasmClassification", description=""" diff --git a/mteb/tasks/Classification/kur/KurdishSentimentClassification.py b/mteb/tasks/Classification/kur/KurdishSentimentClassification.py index b12f7c918a..11eeefdcf6 100644 --- a/mteb/tasks/Classification/kur/KurdishSentimentClassification.py +++ b/mteb/tasks/Classification/kur/KurdishSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class KurdishSentimentClassification(AbsTaskClassification): +class KurdishSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="KurdishSentimentClassification", description="Kurdish Sentiment Dataset", diff --git a/mteb/tasks/Classification/mal/MalayalamNewsClassification.py b/mteb/tasks/Classification/mal/MalayalamNewsClassification.py index d3caa44694..dff91e0b75 100644 --- a/mteb/tasks/Classification/mal/MalayalamNewsClassification.py +++ b/mteb/tasks/Classification/mal/MalayalamNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class MalayalamNewsClassification(AbsTaskClassification): +class MalayalamNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MalayalamNewsClassification", description="A Malayalam dataset for 3-class classification of Malayalam news articles", diff --git a/mteb/tasks/Classification/mar/MarathiNewsClassification.py b/mteb/tasks/Classification/mar/MarathiNewsClassification.py index b5f0a1cae6..8202a12050 100644 --- a/mteb/tasks/Classification/mar/MarathiNewsClassification.py +++ b/mteb/tasks/Classification/mar/MarathiNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class MarathiNewsClassification(AbsTaskClassification): +class MarathiNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MarathiNewsClassification", description="A Marathi dataset for 3-class classification of Marathi news articles", diff --git a/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py b/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py index 621ee8e9fb..bb5f3dd762 100644 --- a/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py +++ b/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class MacedonianTweetSentimentClassification(AbsTaskClassification): +class MacedonianTweetSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MacedonianTweetSentimentClassification", description="An Macedonian dataset for tweet sentiment classification.", diff --git a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py index 28cb09f244..e027c34005 100644 --- a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py +++ b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py @@ -2,7 +2,7 @@ import datasets -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata @@ -13,7 +13,7 @@ def _transform(dataset, lang): return dataset -class AfriSentiClassification(AbsTaskClassification): +class AfriSentiClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AfriSentiClassification", description="AfriSenti is the largest sentiment analysis dataset for under-represented African languages.", diff --git a/mteb/tasks/Classification/multilingual/AfriSentiLangClassification.py b/mteb/tasks/Classification/multilingual/AfriSentiLangClassification.py index 5192376e43..135c001156 100644 --- a/mteb/tasks/Classification/multilingual/AfriSentiLangClassification.py +++ b/mteb/tasks/Classification/multilingual/AfriSentiLangClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class AfriSentiLangClassification(AbsTaskClassification): +class AfriSentiLangClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AfriSentiLangClassification", description="AfriSentiLID is the largest LID classification dataset for African Languages.", diff --git a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py index ab5a3e21db..bae919e240 100644 --- a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class AmazonCounterfactualClassification(AbsTaskClassification): +class AmazonCounterfactualClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AmazonCounterfactualClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py index 4b4a6b120f..82b88ec3cc 100644 --- a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class AmazonReviewsClassification(AbsTaskClassification): +class AmazonReviewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AmazonReviewsClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py b/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py index 95e16bb80e..c4bf8eb14b 100644 --- a/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py +++ b/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGS = { @@ -9,7 +9,7 @@ } -class CataloniaTweetClassification(AbsTaskClassification): +class CataloniaTweetClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CataloniaTweetClassification", description="""This dataset contains two corpora in Spanish and Catalan that consist of annotated Twitter diff --git a/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py b/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py index 7d110bc2eb..c1074341cc 100644 --- a/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CyrillicTurkicLangClassification(AbsTaskClassification): +class CyrillicTurkicLangClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CyrillicTurkicLangClassification", description="Cyrillic dataset of 8 Turkic languages spoken in Russia and former USSR", diff --git a/mteb/tasks/Classification/multilingual/HinDialectClassification.py b/mteb/tasks/Classification/multilingual/HinDialectClassification.py index df8cbf390e..77cd93201f 100644 --- a/mteb/tasks/Classification/multilingual/HinDialectClassification.py +++ b/mteb/tasks/Classification/multilingual/HinDialectClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = [ @@ -28,7 +28,7 @@ ] -class HinDialectClassification(AbsTaskClassification): +class HinDialectClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="HinDialectClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/IndicLangClassification.py b/mteb/tasks/Classification/multilingual/IndicLangClassification.py index 1bf990fd2f..f4b0e16bd5 100644 --- a/mteb/tasks/Classification/multilingual/IndicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicLangClassification.py @@ -4,7 +4,7 @@ import datasets -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -62,7 +62,7 @@ } -class IndicLangClassification(AbsTaskClassification): +class IndicLangClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="IndicLangClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py b/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py index 0e62e6c215..2345909d58 100644 --- a/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -15,7 +15,7 @@ } -class IndicNLPNewsClassification(AbsTaskClassification): +class IndicNLPNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="IndicNLPNewsClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py b/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py index e315cab0be..eb470f1eaa 100644 --- a/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -20,7 +20,7 @@ } -class IndicSentimentClassification(AbsTaskClassification): +class IndicSentimentClassification(AbsTaskAnyClassification): fast_loading = True metadata = TaskMetadata( name="IndicSentimentClassification", diff --git a/mteb/tasks/Classification/multilingual/LanguageClassification.py b/mteb/tasks/Classification/multilingual/LanguageClassification.py index 47ae8c68b6..1f8a2165de 100644 --- a/mteb/tasks/Classification/multilingual/LanguageClassification.py +++ b/mteb/tasks/Classification/multilingual/LanguageClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = [ @@ -27,7 +27,7 @@ ] -class LanguageClassification(AbsTaskClassification): +class LanguageClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="LanguageClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py index 5f7aa6c7e1..9b3d1af326 100644 --- a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -13,7 +13,7 @@ } -class MTOPDomainClassification(AbsTaskClassification): +class MTOPDomainClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MTOPDomainClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py index 38eaecb0e4..315837c84c 100644 --- a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -13,7 +13,7 @@ } -class MTOPIntentClassification(AbsTaskClassification): +class MTOPIntentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MTOPIntentClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py index e8f4a1304e..ea7a7a210e 100644 --- a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py +++ b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -23,7 +23,7 @@ } -class MasakhaNEWSClassification(AbsTaskClassification): +class MasakhaNEWSClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="MasakhaNEWSClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py index f6dd769363..af87ca4d63 100644 --- a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -58,7 +58,7 @@ } -class MassiveIntentClassification(AbsTaskClassification): +class MassiveIntentClassification(AbsTaskAnyClassification): fast_loading = True metadata = TaskMetadata( name="MassiveIntentClassification", diff --git a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py index 5072af2592..7e214336ed 100644 --- a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -58,7 +58,7 @@ } -class MassiveScenarioClassification(AbsTaskClassification): +class MassiveScenarioClassification(AbsTaskAnyClassification): fast_loading = True metadata = TaskMetadata( name="MassiveScenarioClassification", diff --git a/mteb/tasks/Classification/multilingual/MultiHateClassification.py b/mteb/tasks/Classification/multilingual/MultiHateClassification.py index 96318c9331..779d85eff6 100644 --- a/mteb/tasks/Classification/multilingual/MultiHateClassification.py +++ b/mteb/tasks/Classification/multilingual/MultiHateClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -18,7 +18,7 @@ } -class MultiHateClassification(AbsTaskClassification): +class MultiHateClassification(AbsTaskAnyClassification): fast_loading = True metadata = TaskMetadata( name="MultiHateClassification", diff --git a/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py b/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py index 7a3e530509..085570e839 100644 --- a/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -38,7 +38,7 @@ } -class MultilingualSentimentClassification(AbsTaskClassification): +class MultilingualSentimentClassification(AbsTaskAnyClassification): fast_loading = True metadata = TaskMetadata( name="MultilingualSentimentClassification", diff --git a/mteb/tasks/Classification/multilingual/NaijaSenti.py b/mteb/tasks/Classification/multilingual/NaijaSenti.py index 7f67d7fce5..6166be2ee9 100644 --- a/mteb/tasks/Classification/multilingual/NaijaSenti.py +++ b/mteb/tasks/Classification/multilingual/NaijaSenti.py @@ -4,11 +4,11 @@ import datasets -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NaijaSenti(AbsTaskClassification): +class NaijaSenti(AbsTaskAnyClassification): metadata = TaskMetadata( name="NaijaSenti", description="NaijaSenti is the first large-scale human-annotated Twitter sentiment dataset for the four most widely spoken languages in Nigeria — Hausa, Igbo, Nigerian-Pidgin, and Yorùbá — consisting of around 30,000 annotated tweets per language, including a significant fraction of code-mixed tweets.", diff --git a/mteb/tasks/Classification/multilingual/NordicLangClassification.py b/mteb/tasks/Classification/multilingual/NordicLangClassification.py index 935f1a4ef7..a68f170a23 100644 --- a/mteb/tasks/Classification/multilingual/NordicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/NordicLangClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NordicLangClassification(AbsTaskClassification): +class NordicLangClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NordicLangClassification", description="A dataset for Nordic language identification.", diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py index d8dbb8512d..1cba488c50 100644 --- a/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py +++ b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -17,7 +17,7 @@ } -class NusaParagraphEmotionClassification(AbsTaskClassification): +class NusaParagraphEmotionClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NusaParagraphEmotionClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py index 4d25e4ff62..07dd28423e 100644 --- a/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py +++ b/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -17,7 +17,7 @@ } -class NusaParagraphTopicClassification(AbsTaskClassification): +class NusaParagraphTopicClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NusaParagraphTopicClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/NusaXSenti.py b/mteb/tasks/Classification/multilingual/NusaXSenti.py index 99bbda9a3b..20bc48d71f 100644 --- a/mteb/tasks/Classification/multilingual/NusaXSenti.py +++ b/mteb/tasks/Classification/multilingual/NusaXSenti.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NusaXSentiClassification(AbsTaskClassification): +class NusaXSentiClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NusaX-senti", description="NusaX is a high-quality multilingual parallel corpus that covers 12 languages, Indonesian, English, and 10 Indonesian local languages, namely Acehnese, Balinese, Banjarese, Buginese, Madurese, Minangkabau, Javanese, Ngaju, Sundanese, and Toba Batak. NusaX-Senti is a 3-labels (positive, neutral, negative) sentiment analysis dataset for 10 Indonesian local languages + Indonesian and English.", diff --git a/mteb/tasks/Classification/multilingual/SIB200Classification.py b/mteb/tasks/Classification/multilingual/SIB200Classification.py index eaa78b84b4..ab829922ab 100644 --- a/mteb/tasks/Classification/multilingual/SIB200Classification.py +++ b/mteb/tasks/Classification/multilingual/SIB200Classification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGS = { @@ -204,7 +204,7 @@ } -class SIB200Classification(AbsTaskClassification): +class SIB200Classification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SIB200Classification", description="""SIB-200 is the largest publicly available topic classification diff --git a/mteb/tasks/Classification/multilingual/ScalaClassification.py b/mteb/tasks/Classification/multilingual/ScalaClassification.py index ffb95e733d..fe362f1a67 100644 --- a/mteb/tasks/Classification/multilingual/ScalaClassification.py +++ b/mteb/tasks/Classification/multilingual/ScalaClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGS = { @@ -11,7 +11,7 @@ } -class ScalaClassification(AbsTaskClassification): +class ScalaClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="ScalaClassification", description="""ScaLa a linguistic acceptability dataset for the mainland Scandinavian languages automatically constructed from dependency annotations in Universal Dependencies Treebanks. diff --git a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py index 1b5ff77a2b..7ff83b5943 100644 --- a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py +++ b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = [ @@ -18,7 +18,7 @@ ] -class SouthAfricanLangClassification(AbsTaskClassification): +class SouthAfricanLangClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SouthAfricanLangClassification", dataset={ diff --git a/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py b/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py index 11c45e7213..b0972d3978 100644 --- a/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py +++ b/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SwissJudgementClassification(AbsTaskClassification): +class SwissJudgementClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SwissJudgementClassification", description="Multilingual, diachronic dataset of Swiss Federal Supreme Court cases annotated with the respective binarized judgment outcome (approval/dismissal)", diff --git a/mteb/tasks/Classification/multilingual/TurkicClassification.py b/mteb/tasks/Classification/multilingual/TurkicClassification.py index c9f5680478..e0eaf49ee7 100644 --- a/mteb/tasks/Classification/multilingual/TurkicClassification.py +++ b/mteb/tasks/Classification/multilingual/TurkicClassification.py @@ -5,11 +5,11 @@ import datasets from datasets import DatasetDict -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TurkicClassification(AbsTaskClassification): +class TurkicClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TurkicClassification", description="A dataset of news classification in three Turkic languages.", diff --git a/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py b/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py index 2d6ead9505..d0e3ee94fa 100644 --- a/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py @@ -1,6 +1,6 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata _LANGUAGES = { @@ -15,7 +15,7 @@ } -class TweetSentimentClassification(AbsTaskClassification): +class TweetSentimentClassification(AbsTaskAnyClassification): fast_loading = True metadata = TaskMetadata( name="TweetSentimentClassification", diff --git a/mteb/tasks/Classification/mya/MyanmarNews.py b/mteb/tasks/Classification/mya/MyanmarNews.py index 7f89747821..1f100335cb 100644 --- a/mteb/tasks/Classification/mya/MyanmarNews.py +++ b/mteb/tasks/Classification/mya/MyanmarNews.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class MyanmarNews(AbsTaskClassification): +class MyanmarNews(AbsTaskAnyClassification): metadata = TaskMetadata( name="MyanmarNews", dataset={ diff --git a/mteb/tasks/Classification/nep/NepaliNewsClassification.py b/mteb/tasks/Classification/nep/NepaliNewsClassification.py index 0c4e9427e1..faea86b71c 100644 --- a/mteb/tasks/Classification/nep/NepaliNewsClassification.py +++ b/mteb/tasks/Classification/nep/NepaliNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NepaliNewsClassification(AbsTaskClassification): +class NepaliNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NepaliNewsClassification", description="A Nepali dataset for 7500 news articles ", diff --git a/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py b/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py index c5fb70dcfe..1dcb2993dd 100644 --- a/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py +++ b/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DutchBookReviewSentimentClassification(AbsTaskClassification): +class DutchBookReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DutchBookReviewSentimentClassification", description="A Dutch book review for sentiment classification.", diff --git a/mteb/tasks/Classification/nob/NoRecClassification.py b/mteb/tasks/Classification/nob/NoRecClassification.py index 886c5a794d..d95c8fee68 100644 --- a/mteb/tasks/Classification/nob/NoRecClassification.py +++ b/mteb/tasks/Classification/nob/NoRecClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NoRecClassification(AbsTaskClassification): +class NoRecClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NoRecClassification", description="A Norwegian dataset for sentiment classification on review", diff --git a/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py b/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py index 79b7531bce..836fb4e278 100644 --- a/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py +++ b/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class NorwegianParliamentClassification(AbsTaskClassification): +class NorwegianParliamentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="NorwegianParliamentClassification", description="Norwegian parliament speeches annotated for sentiment", diff --git a/mteb/tasks/Classification/ory/OdiaNewsClassification.py b/mteb/tasks/Classification/ory/OdiaNewsClassification.py index 60b155bf23..b581f49343 100644 --- a/mteb/tasks/Classification/ory/OdiaNewsClassification.py +++ b/mteb/tasks/Classification/ory/OdiaNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class OdiaNewsClassification(AbsTaskClassification): +class OdiaNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="OdiaNewsClassification", description="A Odia dataset for 3-class classification of Odia news articles", diff --git a/mteb/tasks/Classification/pan/PunjabiNewsClassification.py b/mteb/tasks/Classification/pan/PunjabiNewsClassification.py index 4443072be7..0327c78982 100644 --- a/mteb/tasks/Classification/pan/PunjabiNewsClassification.py +++ b/mteb/tasks/Classification/pan/PunjabiNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class PunjabiNewsClassification(AbsTaskClassification): +class PunjabiNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PunjabiNewsClassification", description="A Punjabi dataset for 2-class classification of Punjabi news articles", diff --git a/mteb/tasks/Classification/pol/PolishClassification.py b/mteb/tasks/Classification/pol/PolishClassification.py index 34d745e5c8..798beae7ad 100644 --- a/mteb/tasks/Classification/pol/PolishClassification.py +++ b/mteb/tasks/Classification/pol/PolishClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CbdClassification(AbsTaskClassification): +class CbdClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CBD", description="Polish Tweets annotated for cyberbullying detection.", @@ -38,7 +38,7 @@ class CbdClassification(AbsTaskClassification): ) -class PolEmo2InClassification(AbsTaskClassification): +class PolEmo2InClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PolEmo2.0-IN", description="A collection of Polish online reviews from four domains: medicine, hotels, products and " @@ -79,7 +79,7 @@ class PolEmo2InClassification(AbsTaskClassification): ) -class PolEmo2OutClassification(AbsTaskClassification): +class PolEmo2OutClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PolEmo2.0-OUT", description="A collection of Polish online reviews from four domains: medicine, hotels, products and " @@ -107,7 +107,7 @@ class PolEmo2OutClassification(AbsTaskClassification): ) -class AllegroReviewsClassification(AbsTaskClassification): +class AllegroReviewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="AllegroReviews", description="A Polish dataset for sentiment classification on reviews from e-commerce marketplace Allegro.", @@ -133,7 +133,7 @@ class AllegroReviewsClassification(AbsTaskClassification): ) -class PacClassification(AbsTaskClassification): +class PacClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="PAC", description="Polish Paraphrase Corpus", diff --git a/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py b/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py index c3845bb21c..022d471de5 100644 --- a/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py +++ b/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class HateSpeechPortugueseClassification(AbsTaskClassification): +class HateSpeechPortugueseClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="HateSpeechPortugueseClassification", description="HateSpeechPortugueseClassification is a dataset of Portuguese tweets categorized with their sentiment (2 classes).", diff --git a/mteb/tasks/Classification/ron/Moroco.py b/mteb/tasks/Classification/ron/Moroco.py index 25d62e5fa9..9cc4cc4e03 100644 --- a/mteb/tasks/Classification/ron/Moroco.py +++ b/mteb/tasks/Classification/ron/Moroco.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification # type: ignore from mteb.abstasks.TaskMetadata import TaskMetadata # type: ignore -class Moroco(AbsTaskClassification): +class Moroco(AbsTaskAnyClassification): metadata = TaskMetadata( name="Moroco", dataset={ diff --git a/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py b/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py index 1e40bcb16c..f5894c46fb 100644 --- a/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py +++ b/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class RomanianReviewsSentiment(AbsTaskClassification): +class RomanianReviewsSentiment(AbsTaskAnyClassification): metadata = TaskMetadata( name="RomanianReviewsSentiment", description="LaRoSeDa (A Large Romanian Sentiment Data Set) contains 15,000 reviews written in Romanian", diff --git a/mteb/tasks/Classification/ron/RomanianSentimentClassification.py b/mteb/tasks/Classification/ron/RomanianSentimentClassification.py index 6ca4636d31..fb7dc910a8 100644 --- a/mteb/tasks/Classification/ron/RomanianSentimentClassification.py +++ b/mteb/tasks/Classification/ron/RomanianSentimentClassification.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata TEST_SAMPLES = 2048 -class RomanianSentimentClassification(AbsTaskClassification): +class RomanianSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="RomanianSentimentClassification", description="An Romanian dataset for sentiment classification.", diff --git a/mteb/tasks/Classification/rus/GeoreviewClassification.py b/mteb/tasks/Classification/rus/GeoreviewClassification.py index f150e3be06..460d04b569 100644 --- a/mteb/tasks/Classification/rus/GeoreviewClassification.py +++ b/mteb/tasks/Classification/rus/GeoreviewClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class GeoreviewClassification(AbsTaskClassification): +class GeoreviewClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="GeoreviewClassification", dataset={ diff --git a/mteb/tasks/Classification/rus/HeadlineClassification.py b/mteb/tasks/Classification/rus/HeadlineClassification.py index 29882675bf..d3ff500760 100644 --- a/mteb/tasks/Classification/rus/HeadlineClassification.py +++ b/mteb/tasks/Classification/rus/HeadlineClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class HeadlineClassification(AbsTaskClassification): +class HeadlineClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="HeadlineClassification", dataset={ diff --git a/mteb/tasks/Classification/rus/InappropriatenessClassification.py b/mteb/tasks/Classification/rus/InappropriatenessClassification.py index 683e370903..b2211d83a3 100644 --- a/mteb/tasks/Classification/rus/InappropriatenessClassification.py +++ b/mteb/tasks/Classification/rus/InappropriatenessClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class InappropriatenessClassification(AbsTaskClassification): +class InappropriatenessClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="InappropriatenessClassification", dataset={ diff --git a/mteb/tasks/Classification/rus/KinopoiskClassification.py b/mteb/tasks/Classification/rus/KinopoiskClassification.py index b26f73e2ea..ee1a52a3b9 100644 --- a/mteb/tasks/Classification/rus/KinopoiskClassification.py +++ b/mteb/tasks/Classification/rus/KinopoiskClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class KinopoiskClassification(AbsTaskClassification): +class KinopoiskClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="KinopoiskClassification", dataset={ diff --git a/mteb/tasks/Classification/rus/RuReviewsClassification.py b/mteb/tasks/Classification/rus/RuReviewsClassification.py index 4841f0d9f3..7158a4828d 100644 --- a/mteb/tasks/Classification/rus/RuReviewsClassification.py +++ b/mteb/tasks/Classification/rus/RuReviewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class RuReviewsClassification(AbsTaskClassification): +class RuReviewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="RuReviewsClassification", dataset={ diff --git a/mteb/tasks/Classification/rus/RuSciBenchGRNTIClassification.py b/mteb/tasks/Classification/rus/RuSciBenchGRNTIClassification.py index 3e9e45c7ad..96abe481f0 100644 --- a/mteb/tasks/Classification/rus/RuSciBenchGRNTIClassification.py +++ b/mteb/tasks/Classification/rus/RuSciBenchGRNTIClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class RuSciBenchGRNTIClassification(AbsTaskClassification): +class RuSciBenchGRNTIClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="RuSciBenchGRNTIClassification", dataset={ diff --git a/mteb/tasks/Classification/rus/RuSciBenchOECDClassification.py b/mteb/tasks/Classification/rus/RuSciBenchOECDClassification.py index f7a9b43bc4..f9e62bb37f 100644 --- a/mteb/tasks/Classification/rus/RuSciBenchOECDClassification.py +++ b/mteb/tasks/Classification/rus/RuSciBenchOECDClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class RuSciBenchOECDClassification(AbsTaskClassification): +class RuSciBenchOECDClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="RuSciBenchOECDClassification", dataset={ diff --git a/mteb/tasks/Classification/san/SanskritShlokasClassification.py b/mteb/tasks/Classification/san/SanskritShlokasClassification.py index 2081050408..a2a66faf07 100644 --- a/mteb/tasks/Classification/san/SanskritShlokasClassification.py +++ b/mteb/tasks/Classification/san/SanskritShlokasClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SanskritShlokasClassification(AbsTaskClassification): +class SanskritShlokasClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SanskritShlokasClassification", description="This data set contains ~500 Shlokas ", diff --git a/mteb/tasks/Classification/sin/SinhalaNewsClassification.py b/mteb/tasks/Classification/sin/SinhalaNewsClassification.py index 8871d2f17b..a2e01860f0 100644 --- a/mteb/tasks/Classification/sin/SinhalaNewsClassification.py +++ b/mteb/tasks/Classification/sin/SinhalaNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SinhalaNewsClassification(AbsTaskClassification): +class SinhalaNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SinhalaNewsClassification", description="This file contains news texts (sentences) belonging to 5 different news categories (political, business, technology, sports and Entertainment). The original dataset was released by Nisansa de Silva (Sinhala Text Classification: Observations from the Perspective of a Resource Poor Language, 2015).", diff --git a/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py b/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py index 13e92fad82..87ebd0a892 100644 --- a/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py +++ b/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SinhalaNewsSourceClassification(AbsTaskClassification): +class SinhalaNewsSourceClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SinhalaNewsSourceClassification", description="This dataset contains Sinhala news headlines extracted from 9 news sources (websites) (Sri Lanka Army, Dinamina, GossipLanka, Hiru, ITN, Lankapuwath, NewsLK, Newsfirst, World Socialist Web Site-Sinhala).", diff --git a/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py b/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py index 244a2a6499..f93f4af9ac 100644 --- a/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CSFDSKMovieReviewSentimentClassification(AbsTaskClassification): +class CSFDSKMovieReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="CSFDSKMovieReviewSentimentClassification", description="The dataset contains 30k user reviews from csfd.cz in Slovak.", diff --git a/mteb/tasks/Classification/slk/SlovakHateSpeechClassification.py b/mteb/tasks/Classification/slk/SlovakHateSpeechClassification.py index 23bfa41f3e..bbd6e26ac7 100644 --- a/mteb/tasks/Classification/slk/SlovakHateSpeechClassification.py +++ b/mteb/tasks/Classification/slk/SlovakHateSpeechClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SlovakHateSpeechClassification(AbsTaskClassification): +class SlovakHateSpeechClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SlovakHateSpeechClassification", description="The dataset contains posts from a social network with human annotations for hateful or offensive language in Slovak.", diff --git a/mteb/tasks/Classification/slv/FrenkSlClassification.py b/mteb/tasks/Classification/slv/FrenkSlClassification.py index a5ee3950d2..73ed5ef483 100644 --- a/mteb/tasks/Classification/slv/FrenkSlClassification.py +++ b/mteb/tasks/Classification/slv/FrenkSlClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FrenkSlClassification(AbsTaskClassification): +class FrenkSlClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="FrenkSlClassification", description="Slovenian subset of the FRENK dataset. Also available on HuggingFace dataset hub: English subset, Croatian subset.", diff --git a/mteb/tasks/Classification/spa/SpanishNewsClassification.py b/mteb/tasks/Classification/spa/SpanishNewsClassification.py index 63aa8833e4..3d138213b3 100644 --- a/mteb/tasks/Classification/spa/SpanishNewsClassification.py +++ b/mteb/tasks/Classification/spa/SpanishNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SpanishNewsClassification(AbsTaskClassification): +class SpanishNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SpanishNewsClassification", description="A Spanish dataset for news classification. The dataset includes articles from reputable Spanish news sources spanning 12 different categories.", diff --git a/mteb/tasks/Classification/spa/SpanishSentimentClassification.py b/mteb/tasks/Classification/spa/SpanishSentimentClassification.py index f02bff1f1b..0b9e31b8f1 100644 --- a/mteb/tasks/Classification/spa/SpanishSentimentClassification.py +++ b/mteb/tasks/Classification/spa/SpanishSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SpanishSentimentClassification(AbsTaskClassification): +class SpanishSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SpanishSentimentClassification", description="A Spanish dataset for sentiment classification.", diff --git a/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py b/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py index ca6c801c7a..88650fc28e 100644 --- a/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py +++ b/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SiswatiNewsClassification(AbsTaskClassification): +class SiswatiNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SiswatiNewsClassification", description="Siswati News Classification Dataset", diff --git a/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py b/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py index ddfc110e99..f1d7b9ce7e 100644 --- a/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SlovakMovieReviewSentimentClassification(AbsTaskClassification): +class SlovakMovieReviewSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SlovakMovieReviewSentimentClassification", description="User reviews of movies on the CSFD movie database, with 2 sentiment classes (positive, negative)", diff --git a/mteb/tasks/Classification/swa/SwahiliNewsClassification.py b/mteb/tasks/Classification/swa/SwahiliNewsClassification.py index 385acb051f..7799216607 100644 --- a/mteb/tasks/Classification/swa/SwahiliNewsClassification.py +++ b/mteb/tasks/Classification/swa/SwahiliNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SwahiliNewsClassification(AbsTaskClassification): +class SwahiliNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SwahiliNewsClassification", description="Dataset for Swahili News Classification, categorized with 6 domains (Local News (Kitaifa), International News (Kimataifa), Finance News (Uchumi), Health News (Afya), Sports News (Michezo), and Entertainment News (Burudani)). Building and Optimizing Swahili Language Models: Techniques, Embeddings, and Datasets", diff --git a/mteb/tasks/Classification/swe/DalajClassification.py b/mteb/tasks/Classification/swe/DalajClassification.py index 7e640083b4..1162d6f9dc 100644 --- a/mteb/tasks/Classification/swe/DalajClassification.py +++ b/mteb/tasks/Classification/swe/DalajClassification.py @@ -1,11 +1,11 @@ # SuperLIM tasks from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DalajClassification(AbsTaskClassification): +class DalajClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="DalajClassification", dataset={ diff --git a/mteb/tasks/Classification/swe/SweRecClassification.py b/mteb/tasks/Classification/swe/SweRecClassification.py index 4fa33e4682..3afb6dffdc 100644 --- a/mteb/tasks/Classification/swe/SweRecClassification.py +++ b/mteb/tasks/Classification/swe/SweRecClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SweRecClassification(AbsTaskClassification): +class SweRecClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SweRecClassification", description="A Swedish dataset for sentiment classification on review", diff --git a/mteb/tasks/Classification/swe/SwedishSentimentClassification.py b/mteb/tasks/Classification/swe/SwedishSentimentClassification.py index 44ea89228c..f7d460996e 100644 --- a/mteb/tasks/Classification/swe/SwedishSentimentClassification.py +++ b/mteb/tasks/Classification/swe/SwedishSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SwedishSentimentClassification(AbsTaskClassification): +class SwedishSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="SwedishSentimentClassification", description="Dataset of Swedish reviews scarped from various public available websites", diff --git a/mteb/tasks/Classification/tam/TamilNewsClassification.py b/mteb/tasks/Classification/tam/TamilNewsClassification.py index a0145927ef..9d9222e58b 100644 --- a/mteb/tasks/Classification/tam/TamilNewsClassification.py +++ b/mteb/tasks/Classification/tam/TamilNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TamilNewsClassification(AbsTaskClassification): +class TamilNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TamilNewsClassification", description="A Tamil dataset for 6-class classification of Tamil news articles", diff --git a/mteb/tasks/Classification/tel/TeluguAndhraJyotiNewsClassification.py b/mteb/tasks/Classification/tel/TeluguAndhraJyotiNewsClassification.py index 41cd90fe22..d82de35907 100644 --- a/mteb/tasks/Classification/tel/TeluguAndhraJyotiNewsClassification.py +++ b/mteb/tasks/Classification/tel/TeluguAndhraJyotiNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TeluguAndhraJyotiNewsClassification(AbsTaskClassification): +class TeluguAndhraJyotiNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TeluguAndhraJyotiNewsClassification", description="A Telugu dataset for 5-class classification of Telugu news articles", diff --git a/mteb/tasks/Classification/tha/WisesightSentimentClassification.py b/mteb/tasks/Classification/tha/WisesightSentimentClassification.py index c6a041eae9..399d6a2511 100644 --- a/mteb/tasks/Classification/tha/WisesightSentimentClassification.py +++ b/mteb/tasks/Classification/tha/WisesightSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WisesightSentimentClassification(AbsTaskClassification): +class WisesightSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WisesightSentimentClassification", description="Wisesight Sentiment Corpus: Social media messages in Thai language with sentiment label (positive, neutral, negative, question)", diff --git a/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py index 0153392c0d..4b722b8d03 100644 --- a/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py +++ b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class WongnaiReviewsClassification(AbsTaskClassification): +class WongnaiReviewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="WongnaiReviewsClassification", description="Wongnai features over 200,000 restaurants, beauty salons, and spas across Thailand on its platform, with detailed information about each merchant and user reviews. In this dataset there are 5 classes corressponding each star rating", diff --git a/mteb/tasks/Classification/tsn/TswanaNewsClassification.py b/mteb/tasks/Classification/tsn/TswanaNewsClassification.py index 647d40568a..0a9e017d7d 100644 --- a/mteb/tasks/Classification/tsn/TswanaNewsClassification.py +++ b/mteb/tasks/Classification/tsn/TswanaNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TswanaNewsClassification(AbsTaskClassification): +class TswanaNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TswanaNewsClassification", description="Tswana News Classification Dataset", diff --git a/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py b/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py index 28ee632f2c..069e3dedec 100644 --- a/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py +++ b/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TurkishMovieSentimentClassification(AbsTaskClassification): +class TurkishMovieSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TurkishMovieSentimentClassification", description="Turkish Movie Review Dataset", diff --git a/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py b/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py index 823e27410b..81a50ef045 100644 --- a/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py +++ b/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TurkishProductSentimentClassification(AbsTaskClassification): +class TurkishProductSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="TurkishProductSentimentClassification", description="Turkish Product Review Dataset", diff --git a/mteb/tasks/Classification/ukr/UkrFormalityClassification.py b/mteb/tasks/Classification/ukr/UkrFormalityClassification.py index 974c5603b3..460894af74 100644 --- a/mteb/tasks/Classification/ukr/UkrFormalityClassification.py +++ b/mteb/tasks/Classification/ukr/UkrFormalityClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class UkrFormalityClassification(AbsTaskClassification): +class UkrFormalityClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="UkrFormalityClassification", description=""" diff --git a/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py b/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py index c7ce05a81f..055b2f91aa 100644 --- a/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py +++ b/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class UrduRomanSentimentClassification(AbsTaskClassification): +class UrduRomanSentimentClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="UrduRomanSentimentClassification", description="The Roman Urdu dataset is a data corpus comprising of more than 20000 records tagged for sentiment (Positive, Negative, Neutral)", diff --git a/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py b/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py index b6eda24ec4..a58c61daac 100644 --- a/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py +++ b/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py @@ -1,12 +1,12 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata TEST_SAMPLES = 2048 -class VieStudentFeedbackClassification(AbsTaskClassification): +class VieStudentFeedbackClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="VieStudentFeedbackClassification", description="A Vietnamese dataset for classification of student feedback", diff --git a/mteb/tasks/Classification/zho/CMTEBClassification.py b/mteb/tasks/Classification/zho/CMTEBClassification.py index 6fddb21f36..1a285b161d 100644 --- a/mteb/tasks/Classification/zho/CMTEBClassification.py +++ b/mteb/tasks/Classification/zho/CMTEBClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class TNews(AbsTaskClassification): +class TNews(AbsTaskAnyClassification): metadata = TaskMetadata( name="TNews", description="Short Text Classification for News", @@ -75,7 +75,7 @@ class TNews(AbsTaskClassification): samples_per_label = 32 -class IFlyTek(AbsTaskClassification): +class IFlyTek(AbsTaskAnyClassification): metadata = TaskMetadata( name="IFlyTek", description="Long Text classification for the description of Apps", @@ -148,7 +148,7 @@ class IFlyTek(AbsTaskClassification): n_experiments = 5 -class MultilingualSentiment(AbsTaskClassification): +class MultilingualSentiment(AbsTaskAnyClassification): metadata = TaskMetadata( name="MultilingualSentiment", description="A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative", @@ -177,7 +177,7 @@ class MultilingualSentiment(AbsTaskClassification): samples_per_label = 32 -class JDReview(AbsTaskClassification): +class JDReview(AbsTaskAnyClassification): metadata = TaskMetadata( name="JDReview", description="review for iphone", @@ -211,7 +211,7 @@ class JDReview(AbsTaskClassification): samples_per_label = 32 -class OnlineShopping(AbsTaskClassification): +class OnlineShopping(AbsTaskAnyClassification): metadata = TaskMetadata( name="OnlineShopping", description="Sentiment Analysis of User Reviews on Online Shopping Websites", @@ -245,7 +245,7 @@ class OnlineShopping(AbsTaskClassification): samples_per_label = 32 -class Waimai(AbsTaskClassification): +class Waimai(AbsTaskAnyClassification): metadata = TaskMetadata( name="Waimai", description="Sentiment Analysis of user reviews on takeaway platforms", diff --git a/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py b/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py index 7b719a8804..64079031b9 100644 --- a/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py +++ b/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class YueOpenriceReviewClassification(AbsTaskClassification): +class YueOpenriceReviewClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="YueOpenriceReviewClassification", description="A Cantonese dataset for review classification", diff --git a/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py b/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py index 544c87c0b0..0166be4817 100644 --- a/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py +++ b/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class IsiZuluNewsClassification(AbsTaskClassification): +class IsiZuluNewsClassification(AbsTaskAnyClassification): metadata = TaskMetadata( name="IsiZuluNewsClassification", description="isiZulu News Classification Dataset", diff --git a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py index ea0cc5c929..bfe225f612 100644 --- a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py @@ -1,10 +1,15 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class BirdsnapClassification(AbsTaskImageClassification): +class BirdsnapClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + label_column_name: str = "common" + metadata = TaskMetadata( name="Birdsnap", description="Classifying bird images from 500 species.", @@ -38,6 +43,3 @@ class BirdsnapClassification(AbsTaskImageClassification): } """, ) - - # Override default column name in the subclass - label_column_name: str = "common" diff --git a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py index f5f4f22666..38433d5282 100644 --- a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class CIFAR10Classification(AbsTaskImageClassification): +class CIFAR10Classification(AbsTaskAnyClassification): + input_column_name: str = "img" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="CIFAR10", description="Classifying images from 10 classes.", @@ -37,10 +41,14 @@ class CIFAR10Classification(AbsTaskImageClassification): } """, ) - image_column_name: str = "img" -class CIFAR100Classification(AbsTaskImageClassification): +class CIFAR100Classification(AbsTaskAnyClassification): + input_column_name: str = "img" + label_column_name: str = "fine_label" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="CIFAR100", description="Classifying images from 100 classes.", @@ -73,5 +81,3 @@ class CIFAR100Classification(AbsTaskImageClassification): } """, ) - image_column_name: str = "img" - label_column_name: str = "fine_label" diff --git a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py index c58bd57701..a3acf3406d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class Caltech101Classification(AbsTaskImageClassification): +class Caltech101Classification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="Caltech101", description="Classifying images of 101 widely varied objects.", diff --git a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py index f1eaeee66c..ae8d005252 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py @@ -1,10 +1,15 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class Country211Classification(AbsTaskImageClassification): +class Country211Classification(AbsTaskAnyClassification): + input_column_name: str = "jpg" + label_column_name: str = "cls" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="Country211", description="Classifying images of 211 countries.", @@ -36,6 +41,3 @@ class Country211Classification(AbsTaskImageClassification): year={2021} }""", ) - - image_column_name: str = "jpg" - label_column_name: str = "cls" diff --git a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py index 3ff81c7a34..dbd55707f7 100644 --- a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class DTDClassification(AbsTaskImageClassification): +class DTDClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="DTD", description="Describable Textures Dataset in 47 categories.", diff --git a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py index c5d84d3570..d7da427391 100644 --- a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class EuroSATClassification(AbsTaskImageClassification): +class EuroSATClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="EuroSAT", description="Classifying satellite images.", diff --git a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py index 59ebf69677..1560e502c2 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py @@ -1,10 +1,15 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FER2013Classification(AbsTaskImageClassification): +class FER2013Classification(AbsTaskAnyClassification): + input_column_name: str = "jpg" + label_column_name: str = "cls" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="FER2013", description="Classifying facial emotions.", @@ -40,5 +45,3 @@ class FER2013Classification(AbsTaskImageClassification): } """, ) - image_column_name: str = "jpg" - label_column_name: str = "cls" diff --git a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py index 6a0f38a34a..22321ced0f 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py @@ -1,10 +1,16 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class FGVCAircraftClassification(AbsTaskImageClassification): +class FGVCAircraftClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + # could be family, manufacturer, or variant. Variant has the higher number of classes. + label_column_name: str = "variant" + metadata = TaskMetadata( name="FGVCAircraft", description="Classifying aircraft images from 41 manufacturers and 102 variants.", @@ -41,4 +47,3 @@ class FGVCAircraftClassification(AbsTaskImageClassification): } """, ) - label_column_name: str = "variant" ## could be family, manufacturer, or variant. Variant has the higher number of classes. diff --git a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py index b1f0bcb548..86678d4b95 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class Food101Classification(AbsTaskImageClassification): +class Food101Classification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="Food101Classification", description="Classifying food.", diff --git a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py index ad68e6a605..9b885c9f17 100644 --- a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py @@ -1,10 +1,15 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class GTSRBClassification(AbsTaskImageClassification): +class GTSRBClassification(AbsTaskAnyClassification): + input_column_name = "webp" + label_column_name = "cls" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="GTSRB", description="""The German Traffic Sign Recognition Benchmark (GTSRB) is a multi-class classification dataset for traffic signs. It consists of dataset of more than 50,000 traffic sign images. The dataset comprises 43 classes with unbalanced class frequencies.""", @@ -41,5 +46,3 @@ class GTSRBClassification(AbsTaskImageClassification): doi={10.1109/IJCNN.2011.6033395}} """, ) - image_column_name = "webp" - label_column_name = "cls" diff --git a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py index a08477b2f8..ed47919667 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py +++ b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py @@ -1,10 +1,15 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class Imagenet1kClassification(AbsTaskImageClassification): +class Imagenet1kClassification(AbsTaskAnyClassification): + input_column_name: str = "jpg" + label_column_name: str = "cls" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="Imagenet1k", description="ImageNet, a large-scale ontology of images built upon the backbone of the WordNet structure.", @@ -38,5 +43,3 @@ class Imagenet1kClassification(AbsTaskImageClassification): organization={Ieee} }""", ) - image_column_name: str = "jpg" - label_column_name: str = "cls" diff --git a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py index 17adce79ad..48a89b09cb 100644 --- a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class MNISTClassification(AbsTaskImageClassification): +class MNISTClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="MNIST", description="Classifying handwritten digits.", diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py index 1915bbec73..9cc49a286d 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class OxfordFlowersClassification(AbsTaskImageClassification): +class OxfordFlowersClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="OxfordFlowersClassification", description="Classifying flowers", diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py index 93d5adf1c9..252879a84e 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class OxfordPetsClassification(AbsTaskImageClassification): +class OxfordPetsClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="OxfordPets", description="Classifying animal images.", diff --git a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py index b3ee2a0b47..6d4cb71184 100644 --- a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py @@ -1,10 +1,15 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class PatchCamelyonClassification(AbsTaskImageClassification): +class PatchCamelyonClassification(AbsTaskAnyClassification): + input_column_name = "webp" + label_column_name = "cls" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="PatchCamelyon", description="""Histopathology diagnosis classification dataset.""", @@ -51,5 +56,3 @@ class PatchCamelyonClassification(AbsTaskImageClassification): } """, ) - image_column_name = "webp" - label_column_name = "cls" diff --git a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py index e3549d57cf..014326803f 100644 --- a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class RESISC45Classification(AbsTaskImageClassification): +class RESISC45Classification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="RESISC45", description="Remote Sensing Image Scene Classification by Northwestern Polytechnical University (NWPU).", diff --git a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py index 5684afdf49..4d290bca76 100644 --- a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py @@ -1,10 +1,10 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class STL10Classification(AbsTaskImageClassification): +class STL10Classification(AbsTaskAnyClassification): metadata = TaskMetadata( name="STL10", description="Classifying 96x96 images from 10 classes.", diff --git a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py index 25c286b85d..3d3c9e39fd 100644 --- a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class SUN397Classification(AbsTaskImageClassification): +class SUN397Classification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="SUN397", description="Large scale scene recognition in 397 categories.", diff --git a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py index 3993d6646e..3ff5595902 100644 --- a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class StanfordCarsClassification(AbsTaskImageClassification): +class StanfordCarsClassification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="StanfordCars", description="Classifying car images from 196 makes.", diff --git a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py index e2f26c5050..46c4241a73 100644 --- a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py @@ -1,10 +1,14 @@ from __future__ import annotations -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification +from mteb.abstasks import AbsTaskAnyClassification from mteb.abstasks.TaskMetadata import TaskMetadata -class UCF101Classification(AbsTaskImageClassification): +class UCF101Classification(AbsTaskAnyClassification): + input_column_name: str = "image" + samples_per_label: int = 16 + n_experiments: int = 5 + metadata = TaskMetadata( name="UCF101", description="""UCF101 is an action recognition data set of realistic diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index dfefb1ae62..311c2285d0 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -6,8 +6,8 @@ from datasets import Dataset, DatasetDict from PIL import Image +from mteb.abstasks.AbsTaskAnyClassification import AbsTaskAnyClassification from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining -from mteb.abstasks.AbsTaskClassification import AbsTaskClassification from mteb.abstasks.AbsTaskClustering import AbsTaskClustering from mteb.abstasks.AbsTaskClusteringFast import AbsTaskClusteringFast from mteb.abstasks.AbsTaskMultilabelClassification import ( @@ -19,7 +19,6 @@ from mteb.abstasks.AbsTaskSummarization import AbsTaskSummarization from mteb.abstasks.Image.AbsTaskAny2AnyMultiChoice import AbsTaskAny2AnyMultiChoice from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval -from mteb.abstasks.Image.AbsTaskImageClassification import AbsTaskImageClassification from mteb.abstasks.Image.AbsTaskImageClustering import AbsTaskImageClustering from mteb.abstasks.Image.AbsTaskImageMultilabelClassification import ( # noqa AbsTaskImageMultilabelClassification, @@ -85,35 +84,45 @@ def base_retrieval_datasplit(): } -class MockClassificationTask(AbsTaskClassification): +class MockClassificationTask(AbsTaskAnyClassification): expected_stats = { "test": { "num_samples": 2, "number_of_characters": 52, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, }, "train": { "num_samples": 2, "number_of_characters": 53, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, }, } @@ -149,49 +158,64 @@ def load_data(self, **kwargs): self.data_loaded = True -class MockMultilingualClassificationTask(AbsTaskClassification): +class MockMultilingualClassificationTask(AbsTaskAnyClassification): expected_stats = { "test": { "num_samples": 4, "number_of_characters": 104, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + }, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 52, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, }, "fra": { "num_samples": 2, "number_of_characters": 52, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, }, }, }, @@ -199,43 +223,58 @@ class MockMultilingualClassificationTask(AbsTaskClassification): "num_samples": 4, "number_of_characters": 106, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + }, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 53, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, }, "fra": { "num_samples": 2, "number_of_characters": 53, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 1, - "average_label_per_text": 1.0, - "max_labels_per_text": 1, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, }, }, }, @@ -1613,29 +1652,39 @@ class MockMultilabelClassification(AbsTaskMultilabelClassification): "num_samples": 6, "number_of_characters": 156, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, }, "train": { "num_samples": 6, "number_of_characters": 159, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, }, } @@ -1676,43 +1725,58 @@ class MockMultilingualMultilabelClassification(AbsTaskMultilabelClassification): "num_samples": 12, "number_of_characters": 312, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + }, "hf_subset_descriptive_stats": { "eng": { "num_samples": 6, "number_of_characters": 156, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, }, "fra": { "num_samples": 6, "number_of_characters": 156, "number_texts_intersect_with_train": 1, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, }, }, }, @@ -1720,43 +1784,58 @@ class MockMultilingualMultilabelClassification(AbsTaskMultilabelClassification): "num_samples": 12, "number_of_characters": 318, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + }, "hf_subset_descriptive_stats": { "eng": { "num_samples": 6, "number_of_characters": 159, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, }, "fra": { "num_samples": 6, "number_of_characters": 159, "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.5, - "max_text_length": 30, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "text_statistics": { + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + }, + "image_statistics": None, + "label_statistics": { + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, }, }, }, @@ -2117,13 +2196,39 @@ def load_data(self, **kwargs): class MockMultiChoiceTask(AbsTaskAny2AnyMultiChoice): expected_stats = { "test": { - "num_samples": 2, - "average_question_length": 26.0, - "average_choice_length": 30.5, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "number_of_characters": 60, + "num_samples": 4, + "num_queries": 2, + "num_documents": 2, + "min_document_length": 0, + "average_document_length": 0, + "max_document_length": 0, + "unique_documents": 0, + "min_document_image_width": 100, + "average_document_image_width": 100.0, + "max_document_image_width": 100, + "min_document_image_height": 100, + "average_document_image_height": 100.0, + "max_document_image_height": 100, + "num_document_images": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "num_query_images": 2, + "min_query_image_width": 100, + "average_query_image_width": 100.0, + "max_query_image_width": 100, + "min_query_image_height": 100, + "average_query_image_height": 100.0, + "max_query_image_height": 100, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2, } } + metadata = TaskMetadata( type="Any2AnyMultiChoice", name="MockMultiChoice", @@ -2263,11 +2368,24 @@ def load_data(self, **kwargs): class MockAny2AnyRetrievalI2TTask(AbsTaskAny2AnyRetrieval): expected_stats = { "test": { - "average_document_length": 30.0, - "average_query_length": 26.0, - "num_documents": 2, + "number_of_characters": 60, + "num_samples": 4, "num_queries": 2, - "average_relevant_docs_per_query": 1.0, + "num_documents": 2, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 2, + "num_document_images": 0, + "min_query_length": 0, + "average_query_length": 0, + "max_query_length": 0, + "unique_queries": 0, + "num_query_images": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, } } @@ -2320,13 +2438,27 @@ def load_data(self, **kwargs): class MockAny2AnyRetrievalT2ITask(AbsTaskAny2AnyRetrieval): expected_stats = { "test": { - "average_document_length": 30.0, - "average_query_length": 26.0, - "num_documents": 2, + "number_of_characters": 60, + "num_samples": 4, "num_queries": 2, - "average_relevant_docs_per_query": 1.0, + "num_documents": 2, + "min_document_length": 0, + "average_document_length": 0, + "max_document_length": 0, + "unique_documents": 0, + "num_document_images": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "num_query_images": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, } } + metadata = TaskMetadata( type="Any2AnyRetrieval", name="MockAny2AnyRetrievalT2I", @@ -2373,89 +2505,63 @@ def load_data(self, **kwargs): self.data_loaded = True -class MockImageClassificationTask(AbsTaskImageClassification): +class MockImageClassificationTask(AbsTaskAnyClassification): expected_stats = { "test": { "num_samples": 2, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 1}, "0": {"count": 1}}, + }, }, "train": { "num_samples": 10, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 5}, "0": {"count": 5}}, - }, - } - - metadata = TaskMetadata( - type="ImageClassification", - name="MockImageClassification", - main_score="accuracy", - **general_args, # type: ignore - ) - metadata.modalities = ["image"] - metadata.category = "i2i" - - def __init__(self, **kwargs): - super().__init__(n_experiments=1, samples_per_label=5, **kwargs) - - def load_data(self, **kwargs): - images = [np.random.randint(0, 255, (100, 100, 3)) for _ in range(2)] # noqa: NPY002 - images = [ - Image.fromarray(image.astype("uint8")).convert("RGBA") for image in images - ] - labels = [1, 0] - - self.dataset = DatasetDict( - { - "test": Dataset.from_dict( - { - "image": images, - "label": labels, - } - ), - "train": Dataset.from_dict( - { - "image": images * 5, - "label": labels * 5, - } - ), - } - ) - self.data_loaded = True - - -class MockImageClassificationKNNTask(AbsTaskImageClassification): - expected_stats = ( - { - "test": { - "num_samples": 2, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, }, - "train": { - "num_samples": 10, - "average_image_size": 26.0, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"1": {"count": 5}, "0": {"count": 5}}, }, }, - ) + } metadata = TaskMetadata( type="ImageClassification", - name="MockImageClassificationKNN", + name="MockImageClassification", main_score="accuracy", **general_args, # type: ignore ) metadata.modalities = ["image"] - metadata.category = "i2i" - - def __init__(self, **kwargs): - super().__init__(method="kNN", n_experiments=1, samples_per_label=5, **kwargs) + metadata.category = "i2c" + n_experiments = 1 + samples_per_label = 5 + input_column_name = "image" def load_data(self, **kwargs): images = [np.random.randint(0, 255, (100, 100, 3)) for _ in range(2)] # noqa: NPY002 @@ -2483,51 +2589,142 @@ def load_data(self, **kwargs): self.data_loaded = True -class MockMultilingualImageClassificationTask(AbsTaskImageClassification): +class MockMultilingualImageClassificationTask(AbsTaskAnyClassification): n_experiments = 1 samples_per_label = 5 expected_stats = { "test": { "num_samples": 4, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 2}, "0": {"count": 2}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 2}, "0": {"count": 2}}, + }, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 1}, "0": {"count": 1}}, + }, }, "fra": { "num_samples": 2, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 1}, "0": {"count": 1}}, + }, }, }, }, "train": { "num_samples": 20, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 10}, "0": {"count": 10}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 10}, "0": {"count": 10}}, + }, "hf_subset_descriptive_stats": { "eng": { "num_samples": 10, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 5}, "0": {"count": 5}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 5}, "0": {"count": 5}}, + }, }, "fra": { "num_samples": 10, - "average_image_size": 26.0, - "unique_labels": 2, - "labels": {"1": {"count": 5}, "0": {"count": 5}}, + "number_of_characters": 0, + "number_texts_intersect_with_train": None, + "text_statistics": None, + "image_statistics": { + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + }, + "label_statistics": { + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, + "unique_labels": 2, + "labels": {"1": {"count": 5}, "0": {"count": 5}}, + }, }, }, }, } + metadata = TaskMetadata( type="ImageClassification", name="MockMultilingualImageClassification", @@ -2535,8 +2732,9 @@ class MockMultilingualImageClassificationTask(AbsTaskImageClassification): **general_args, # type: ignore ) metadata.modalities = ["image"] - metadata.category = "i2i" + metadata.category = "i2c" metadata.eval_langs = multilingual_eval_langs + input_column_name = "image" def load_data(self, **kwargs): images = [np.random.randint(0, 255, (100, 100, 3)) for _ in range(2)] # noqa: NPY002 @@ -2572,8 +2770,13 @@ class MockImageClusteringTask(AbsTaskImageClustering): expected_stats = { "test": { "num_samples": 2, - "average_image_size": 26.0, - "unique_labels": 2, + "unique_num_labels": 2, + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, "labels": {"1": {"count": 1}, "0": {"count": 1}}, } } @@ -2609,11 +2812,23 @@ def load_data(self, **kwargs): class MockImageMultilabelClassificationTask(AbsTaskImageMultilabelClassification): expected_stats = { "test": { - "average_image_size": 26.0, - "average_label_per_image": 2.0, - "num_samples": 6, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "num_samples": 4, + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + "min_labels_per_sample": 2, + "average_label_per_sample": 2.0, + "max_labels_per_sample": 2, + "unique_num_labels": 4, + "labels": { + "0": {"count": 2}, + "3": {"count": 2}, + "1": {"count": 2}, + "2": {"count": 2}, + }, } } @@ -2659,29 +2874,66 @@ class MockMultilingualImageMultilabelClassificationTask( ): expected_stats = { "test": { - "average_image_size": 26.0, - "average_label_per_image": 2.0, - "num_samples": 12, - "unique_labels": 2, - "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "num_samples": 8, + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + "min_labels_per_sample": 2, + "average_label_per_sample": 2.0, + "max_labels_per_sample": 2, + "unique_num_labels": 4, + "labels": { + "0": {"count": 4}, + "3": {"count": 4}, + "1": {"count": 4}, + "2": {"count": 4}, + }, "hf_subset_descriptive_stats": { "eng": { - "average_image_size": 26.0, - "average_label_per_image": 2.0, - "num_samples": 6, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "num_samples": 4, + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + "min_labels_per_sample": 2, + "average_label_per_sample": 2.0, + "max_labels_per_sample": 2, + "unique_num_labels": 4, + "labels": { + "0": {"count": 2}, + "3": {"count": 2}, + "1": {"count": 2}, + "2": {"count": 2}, + }, }, "fra": { - "average_image_size": 26.0, - "average_label_per_image": 2.0, - "num_samples": 6, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, + "num_samples": 4, + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + "min_labels_per_sample": 2, + "average_label_per_sample": 2.0, + "max_labels_per_sample": 2, + "unique_num_labels": 4, + "labels": { + "0": {"count": 2}, + "3": {"count": 2}, + "1": {"count": 2}, + "2": {"count": 2}, + }, }, }, } } + metadata = TaskMetadata( type="ImageMultilabelClassification", name="MockMultilingualImageMultilabelClassification", @@ -2725,11 +2977,13 @@ def load_data(self, **kwargs): class MockImageTextPairClassificationTask(AbsTaskImageTextPairClassification): expected_stats = { "test": { - "average_image_size": 26.0, - "average_text_length": 30.0, "num_samples": 2, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "num_images": 2, + "num_texts": 2, + "num_unique_texts": 2, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, } } @@ -2767,27 +3021,26 @@ class MockMultilingualImageTextPairClassificationTask( ): expected_stats = { "test": { - "average_image_size": 26.0, - "average_text_length": 30.0, - "num_samples": 4, - "unique_labels": 2, - "labels": {"1": {"count": 2}, "0": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { - "average_image_size": 26.0, - "average_text_length": 30.0, "num_samples": 2, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "num_images": 2, + "num_texts": 2, + "num_unique_texts": 2, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, }, "fra": { - "average_image_size": 26.0, - "average_text_length": 30.0, "num_samples": 2, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "num_images": 2, + "num_texts": 2, + "num_unique_texts": 2, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, }, - }, + } } } @@ -2814,7 +3067,7 @@ def load_data(self, **kwargs): "image": images, "caption": texts, } - ), + ) } self.dataset = DatasetDict( @@ -2829,10 +3082,22 @@ def load_data(self, **kwargs): class MockVisualSTSTask(AbsTaskVisualSTS): expected_stats = { "test": { - "average_image_size": 26.0, - "average_text_length": 30.0, "num_samples": 2, - "average_score": 0.5, + "min_image1_width": 100, + "average_image1_width": 100.0, + "max_image1_width": 100, + "min_image1_height": 100, + "average_image1_height": 100.0, + "max_image1_height": 100, + "min_image2_width": 100, + "average_image2_width": 100.0, + "max_image2_width": 100, + "min_image2_height": 100, + "average_image2_height": 100.0, + "max_image2_height": 100, + "min_score": 0.5, + "avg_score": 0.5, + "max_score": 0.5, } } @@ -2870,10 +3135,18 @@ def load_data(self, **kwargs): class MockZeroShotClassificationTask(AbsTaskZeroShotClassification): expected_stats = { "test": { - "average_text_length": 26.0, "num_samples": 2, - "unique_labels": 2, - "labels": {"1": {"count": 1}, "0": {"count": 1}}, + "unique_num_labels": 2, + "min_image_width": 100, + "average_image_width": 100.0, + "max_image_width": 100, + "min_image_height": 100, + "average_image_height": 100.0, + "max_image_height": 100, + "min_label_text_length": 23, + "average_label_text_length": 26.0, + "max_label_text_length": 29, + "labels": {"label1": {"count": 1}, "label2": {"count": 1}}, } } diff --git a/tests/test_benchmark/task_grid.py b/tests/test_benchmark/task_grid.py index 3105a280ef..95f41282e8 100644 --- a/tests/test_benchmark/task_grid.py +++ b/tests/test_benchmark/task_grid.py @@ -12,7 +12,6 @@ MockClassificationTask, MockClusteringFastTask, MockClusteringTask, - MockImageClassificationKNNTask, MockImageClassificationTask, MockImageClusteringTask, MockImageMultilabelClassificationTask, @@ -113,7 +112,6 @@ MockAny2AnyRetrievalT2ITask(), MockMultiChoiceTask(), MockImageClassificationTask(), - MockImageClassificationKNNTask(), MockImageClusteringTask(), MockImageTextPairClassificationTask(), MockVisualSTSTask(), @@ -132,3 +130,11 @@ MOCK_MIEB_TASK_REGISTRY = { task.metadata.name: type(task) for task in MOCK_MIEB_TASK_GRID } + +ALL_TASK_TEST_GRID = MOCK_TASK_TEST_GRID + MOCK_MIEB_TASK_GRID + +ALL_TASK_TEST_GRID_AS_STRING = [ + t.metadata.name if isinstance(t, AbsTask) else t for t in ALL_TASK_TEST_GRID +] + +ALL_MOCK_TASK_REGISTRY = {task.metadata.name: type(task) for task in ALL_TASK_TEST_GRID} diff --git a/tests/test_tasks/test_metadata.py b/tests/test_tasks/test_metadata.py index a8d15ee887..69bfb5b04d 100644 --- a/tests/test_tasks/test_metadata.py +++ b/tests/test_tasks/test_metadata.py @@ -2,12 +2,16 @@ import pytest -from tests.test_benchmark.task_grid import MOCK_TASK_TEST_GRID +from tests.test_benchmark.task_grid import ALL_TASK_TEST_GRID -@pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) +@pytest.mark.parametrize( + "task", + ALL_TASK_TEST_GRID, +) def test_descriptive_stats(task): - # todo integrate MIEB + if task.metadata.name == "MockMultilingualMultiChoice": + pytest.skip("Task computed stats are not implemented correctly yet") result_stat = task.calculate_metadata_metrics() # remove descriptive task file task.metadata.descriptive_stat_path.unlink()