Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mteb/tasks/Classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .ces.CzechSubjectivityClassification import *
from .dan.AngryTweetsClassification import *
from .dan.DanishPoliticalCommentsClassification import *
from .dan.DdiscoCohesionClassification import *
from .dan.DKHateClassification import *
from .dan.LccSentimentClassification import *
from .deu.GermanPoliticiansTwitterSentimentClassification import *
Expand Down Expand Up @@ -109,6 +110,7 @@
from .multilingual.NusaParagraphEmotionClassification import *
from .multilingual.NusaParagraphTopicClassification import *
from .multilingual.NusaXSenti import *
from .multilingual.ru_nlu_intent_classification import *
from .multilingual.ScalaClassification import *
from .multilingual.ScandiSentClassification import *
from .multilingual.SIB200Classification import *
Expand All @@ -132,7 +134,6 @@
from .rus.HeadlineClassification import *
from .rus.InappropriatenessClassification import *
from .rus.KinopoiskClassification import *
from .rus.ru_nlu_intent_classification import *
from .rus.ru_toixic_classification_okmlcup import *
from .rus.RuReviewsClassification import *
from .rus.RuSciBenchGRNTIClassification import *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification):
superseded_by = "CSFDCZMovieReviewSentimentClassification.v2"
metadata = TaskMetadata(
name="CSFDCZMovieReviewSentimentClassification",
description="The dataset contains 30k user reviews from csfd.cz in Czech.",
Expand Down Expand Up @@ -49,3 +50,49 @@ def dataset_transform(self):
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"], n_samples=N_SAMPLES
)


class CSFDCZMovieReviewSentimentClassificationV2(AbsTaskClassification):
metadata = TaskMetadata(
name="CSFDCZMovieReviewSentimentClassification.v2",
description="""The dataset contains 30k user reviews from csfd.cz in Czech.
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
reference="https://arxiv.org/abs/2304.01922",
dataset={
"path": "mteb/csfdcz_movie_review_sentiment",
"revision": "bda232f79c949fd881572f7e1b9ad59fd04a6c7c",
},
type="Classification",
category="s2s",
modalities=["text"],
date=("2002-06-28", "2020-03-13"),
eval_splits=["test"],
eval_langs=["ces-Latn"],
main_score="accuracy",
domains=["Reviews", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-sa-4.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@misc{štefánik2023resources,
archiveprefix = {arXiv},
author = {Michal Štefánik and Marek Kadlčík and Piotr Gramacki and Petr Sojka},
eprint = {2304.01922},
primaryclass = {cs.CL},
title = {Resources and Few-shot Learners for In-context Learning in Slavic Languages},
year = {2023},
}
""",
adapted_from=["CSFDCZMovieReviewSentimentClassification"],
)
# Increase the samples_per_label in order to improve baseline performance
samples_per_label = 20

def dataset_transform(self):
N_SAMPLES = 2048

self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"], n_samples=N_SAMPLES
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class CzechProductReviewSentimentClassification(AbsTaskClassification):
superseded_by = "CzechProductReviewSentimentClassification.v2"
metadata = TaskMetadata(
name="CzechProductReviewSentimentClassification",
description="User reviews of products on Czech e-shop Mall.cz with 3 sentiment classes (positive, neutral, negative)",
Expand Down Expand Up @@ -54,3 +55,54 @@ def dataset_transform(self) -> None:
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)


class CzechProductReviewSentimentClassificationV2(AbsTaskClassification):
metadata = TaskMetadata(
name="CzechProductReviewSentimentClassification.v2",
description="""User reviews of products on Czech e-shop Mall.cz with 3 sentiment classes (positive, neutral, negative)
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
reference="https://aclanthology.org/W13-1609/",
dataset={
"path": "mteb/czech_product_review_sentiment",
"revision": "1a3fb305bde30eec7067ab15ad2db9f61b115ca2",
},
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["ces-Latn"],
main_score="accuracy",
date=("2013-01-01", "2013-06-01"),
dialect=[],
domains=["Reviews", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-nc-sa-4.0",
annotations_creators="derived",
sample_creation="found",
bibtex_citation=r"""
@inproceedings{habernal-etal-2013-sentiment,
address = {Atlanta, Georgia},
author = {Habernal, Ivan and
Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and
Steinberger, Josef},
booktitle = {Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis},
editor = {Balahur, Alexandra and
van der Goot, Erik and
Montoyo, Andres},
month = jun,
pages = {65--74},
publisher = {Association for Computational Linguistics},
title = {Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning},
url = {https://aclanthology.org/W13-1609},
year = {2013},
}
""",
adapted_from=["CzechProductReviewSentimentClassification"],
)
samples_per_label = 16

def dataset_transform(self) -> None:
self.dataset = self.stratified_subsampling(
self.dataset, seed=self.seed, splits=["test"]
)
47 changes: 47 additions & 0 deletions mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class CzechSoMeSentimentClassification(AbsTaskClassification):
superseded_by = "CzechSoMeSentimentClassification.v2"
metadata = TaskMetadata(
name="CzechSoMeSentimentClassification",
description="User comments on Facebook",
Expand Down Expand Up @@ -51,3 +52,49 @@ def dataset_transform(self) -> None:
self.dataset = self.dataset.rename_columns(
{"comment": "text", "sentiment_int": "label"}
)


class CzechSoMeSentimentClassificationV2(AbsTaskClassification):
metadata = TaskMetadata(
name="CzechSoMeSentimentClassification.v2",
description="""User comments on Facebook
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
reference="https://aclanthology.org/W13-1609/",
dataset={
"path": "mteb/czech_so_me_sentiment",
"revision": "a12152e40ff9857bf3c83694528f40ec5c02aafc",
},
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["ces-Latn"],
main_score="accuracy",
date=("2013-01-01", "2013-06-01"),
dialect=[],
domains=["Reviews", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-nc-sa-4.0",
annotations_creators="derived",
sample_creation="found",
bibtex_citation=r"""
@inproceedings{habernal-etal-2013-sentiment,
address = {Atlanta, Georgia},
author = {Habernal, Ivan and
Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and
Steinberger, Josef},
booktitle = {Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis},
editor = {Balahur, Alexandra and
van der Goot, Erik and
Montoyo, Andres},
month = jun,
pages = {65--74},
publisher = {Association for Computational Linguistics},
title = {Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning},
url = {https://aclanthology.org/W13-1609},
year = {2013},
}
""",
adapted_from=["CzechSoMeSentimentClassification"],
)
samples_per_label = 16
40 changes: 40 additions & 0 deletions mteb/tasks/Classification/dan/AngryTweetsClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class AngryTweetsClassification(AbsTaskClassification):
superseded_by = "AngryTweetsClassification.v2"
metadata = TaskMetadata(
name="AngryTweetsClassification",
dataset={
Expand Down Expand Up @@ -39,3 +40,42 @@ class AngryTweetsClassification(AbsTaskClassification):
)

samples_per_label = 16


class AngryTweetsClassificationV2(AbsTaskClassification):
metadata = TaskMetadata(
name="AngryTweetsClassification.v2",
dataset={
"path": "mteb/angry_tweets",
"revision": "b9475fb66a13befda4fa9871cd92343bb2c0eb77",
},
description="""A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
reference="https://aclanthology.org/2021.nodalida-main.53/",
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["dan-Latn"],
main_score="accuracy",
date=("2021-01-01", "2021-12-31"),
domains=["Social", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-4.0",
annotations_creators="human-annotated",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@inproceedings{pauli2021danlp,
author = {Pauli, Amalie Brogaard and Barrett, Maria and Lacroix, Oph{\'e}lie and Hvingelby, Rasmus},
booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)},
pages = {460--466},
title = {DaNLP: An open-source toolkit for Danish Natural Language Processing},
year = {2021},
}
""",
prompt="Classify Danish tweets by sentiment. (positive, negative, neutral).",
adapted_from=["AngryTweetsClassification"],
)

samples_per_label = 16
62 changes: 62 additions & 0 deletions mteb/tasks/Classification/dan/DKHateClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class DKHateClassification(AbsTaskClassification):
superseded_by = "DKHateClassification.v2"
metadata = TaskMetadata(
name="DKHateClassification",
dataset={
Expand Down Expand Up @@ -69,3 +70,64 @@ def dataset_transform(self):
self.dataset = self.dataset.map(
lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]
)


class DKHateClassificationV2(AbsTaskClassification):
metadata = TaskMetadata(
name="DKHateClassification.v2",
dataset={
"path": "mteb/dk_hate",
"revision": "0468ff11393992d8347cf4282fb706fe970608d4",
},
description="""Danish Tweets annotated for Hate Speech either being Offensive or not
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
reference="https://aclanthology.org/2020.lrec-1.430/",
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["dan-Latn"],
main_score="accuracy",
date=("2018-01-01", "2018-12-31"),
domains=["Social", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-4.0",
annotations_creators="expert-annotated",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@inproceedings{sigurbergsson-derczynski-2020-offensive,
abstract = {The presence of offensive language on social media platforms and the implications this poses is becoming a major concern in modern society. Given the enormous amount of content created every day, automatic methods are required to detect and deal with this type of content. Until now, most of the research has focused on solving the problem for the English language, while the problem is multilingual. We construct a Danish dataset DKhate containing user-generated comments from various social media platforms, and to our knowledge, the first of its kind, annotated for various types and target of offensive language. We develop four automatic classification systems, each designed to work for both the English and the Danish language. In the detection of offensive language in English, the best performing system achieves a macro averaged F1-score of 0.74, and the best performing system for Danish achieves a macro averaged F1-score of 0.70. In the detection of whether or not an offensive post is targeted, the best performing system for English achieves a macro averaged F1-score of 0.62, while the best performing system for Danish achieves a macro averaged F1-score of 0.73. Finally, in the detection of the target type in a targeted offensive post, the best performing system for English achieves a macro averaged F1-score of 0.56, and the best performing system for Danish achieves a macro averaged F1-score of 0.63. Our work for both the English and the Danish language captures the type and targets of offensive language, and present automatic methods for detecting different kinds of offensive language such as hate speech and cyberbullying.},
address = {Marseille, France},
author = {Sigurbergsson, Gudbjartur Ingi and
Derczynski, Leon},
booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference},
editor = {Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios},
isbn = {979-10-95546-34-4},
language = {English},
month = may,
pages = {3498--3508},
publisher = {European Language Resources Association},
title = {Offensive Language and Hate Speech Detection for {D}anish},
url = {https://aclanthology.org/2020.lrec-1.430},
year = {2020},
}
""",
prompt="Classify Danish tweets based on offensiveness (offensive, not offensive)",
adapted_from=["DKHateClassification"],
)

samples_per_label = 16
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class DanishPoliticalCommentsClassification(AbsTaskClassification):
superseded_by = "DanishPoliticalCommentsClassification.v2"
metadata = TaskMetadata(
name="DanishPoliticalCommentsClassification",
dataset={
Expand Down Expand Up @@ -49,3 +50,44 @@ def dataset_transform(self):

# create train and test splits
self.dataset = self.dataset["train"].train_test_split(0.2, seed=self.seed)


class DanishPoliticalCommentsClassificationV2(AbsTaskClassification):
metadata = TaskMetadata(
name="DanishPoliticalCommentsClassification.v2",
dataset={
"path": "mteb/danish_political_comments",
"revision": "476a9e7327aba70ad3e97a169d7310b86be9b245",
},
description="""A dataset of Danish political comments rated for sentiment
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
reference="https://huggingface.co/datasets/danish_political_comments",
type="Classification",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["dan-Latn"],
main_score="accuracy",
date=(
"2000-01-01",
"2022-12-31",
), # Estimated range for the collection of comments
domains=["Social", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="not specified",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@techreport{SAMsentiment,
author = {Mads Guldborg Kjeldgaard Kongsbak and Steffan Eybye Christensen and Lucas Høyberg Puvis~de~Chavannes and Peter Due Jensen},
institution = {IT University of Copenhagen},
title = {Sentiment Analysis Multitool, SAM},
year = {2019},
}
""",
prompt="Classify Danish political comments for sentiment",
adapted_from=["DanishPoliticalCommentsClassification"],
)

samples_per_label = 16
Loading