Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"test": {
"num_samples": 1085,
"number_texts_intersect_with_train": 0,
"text_statistics": {
"total_text_length": 115359,
"min_text_length": 8,
"average_text_length": 106.32165898617511,
"max_text_length": 2722,
"unique_texts": 1085
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 3,
"labels": {
"0": {
"count": 868
},
"1": {
"count": 190
},
"2": {
"count": 27
}
}
}
},
"train": {
"num_samples": 7176,
"number_texts_intersect_with_train": null,
"text_statistics": {
"total_text_length": 830248,
"min_text_length": 5,
"average_text_length": 115.69788182831661,
"max_text_length": 4759,
"unique_texts": 7176
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 3,
"labels": {
"0": {
"count": 4933
},
"1": {
"count": 2047
},
"2": {
"count": 196
}
}
}
}
}
7 changes: 6 additions & 1 deletion mteb/tasks/classification/heb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from .hebrew_sentiment_analysis import (
HebrewSentimentAnalysis,
HebrewSentimentAnalysisV2,
HebrewSentimentAnalysisV3,
)

__all__ = ["HebrewSentimentAnalysis", "HebrewSentimentAnalysisV2"]
__all__ = [
"HebrewSentimentAnalysis",
"HebrewSentimentAnalysisV2",
"HebrewSentimentAnalysisV3",
]
65 changes: 62 additions & 3 deletions mteb/tasks/classification/heb/hebrew_sentiment_analysis.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to add superseeded_by

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

V1 is already superseeded_by v2

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v2 needs a superseeded_by v3

Copy link
Member Author

@Samoed Samoed Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we should clenup it as in #2900, but I can't find script for this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe. Try to run this too

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ class HebrewSentimentAnalysis(AbsTaskClassification):
"path": "mteb/HebrewSentimentAnalysis",
"revision": "03eb0996c8234e0d8cd7206bf4763815deda12ed",
},
description="HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy.",
description=(
"HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
"In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
"the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
"the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
),
reference="https://huggingface.co/datasets/hebrew_sentiment",
type="Classification",
category="t2c",
Expand Down Expand Up @@ -37,7 +42,7 @@ class HebrewSentimentAnalysis(AbsTaskClassification):
year = {2018},
}
""",
superseded_by="HebrewSentimentAnalysis.v2",
superseded_by="HebrewSentimentAnalysis.v3",
)


Expand All @@ -49,7 +54,61 @@ class HebrewSentimentAnalysisV2(AbsTaskClassification):
"revision": "7ecd049fc8ac0d6f0a0121c8ff9fe44ea5bd935b",
"name": "morph",
},
description="HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
description=(
"HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
"In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
"the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
"the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
"This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)"
),
reference="https://huggingface.co/datasets/hebrew_sentiment",
type="Classification",
category="t2c",
modalities=["text"],
eval_splits=["test"],
eval_langs=["heb-Hebr"],
main_score="accuracy",
date=("2015-10-01", "2015-10-31"),
domains=["Reviews", "Written"],
task_subtypes=["Sentiment/Hate speech"],
license="mit",
annotations_creators="expert-annotated",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@inproceedings{amram-etal-2018-representations,
address = {Santa Fe, New Mexico, USA},
author = {Amram, Adam and Ben David, Anat and Tsarfaty, Reut},
booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
month = aug,
pages = {2242--2252},
publisher = {Association for Computational Linguistics},
title = {Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew},
url = {https://www.aclweb.org/anthology/C18-1190},
year = {2018},
}
""",
adapted_from=["HebrewSentimentAnalysis"],
superseded_by="HebrewSentimentAnalysis.v3",
)


class HebrewSentimentAnalysisV3(AbsTaskClassification):
label_column_name = "labels"
metadata = TaskMetadata(
name="HebrewSentimentAnalysis.v3",
dataset={
"path": "mteb/HebrewSentimentAnalysisV4",
"revision": "aa0b83c4b16cd28daf7c41ef3402e3ffe9c70c59",
},
description=(
"HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
"In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
"the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
"the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
"This version corrects texts (took pre-tokenized) [more details in this thread](https://huggingface.co/datasets/mteb/HebrewSentimentAnalysis/discussions/2). "
"This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)"
),
reference="https://huggingface.co/datasets/hebrew_sentiment",
type="Classification",
category="t2c",
Expand Down