diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 34c70bc6b9..e06cbd4568 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -79,8 +79,7 @@ "Programming", "Chemistry", "Financial", - "Chemistry", - "Financial", + "Entertainment", ] SAMPLE_CREATION_METHOD = Literal[ diff --git a/mteb/tasks/Classification/pol/PolishClassification.py b/mteb/tasks/Classification/pol/PolishClassification.py index c0963e8283..7b1148f90b 100644 --- a/mteb/tasks/Classification/pol/PolishClassification.py +++ b/mteb/tasks/Classification/pol/PolishClassification.py @@ -123,7 +123,7 @@ class AllegroReviewsClassification(AbsTaskClassification): eval_langs=["pol-Latn"], main_score="accuracy", date=None, - domains=None, + domains=["Reviews"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py b/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py index dbe155658e..e8407b2429 100644 --- a/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py +++ b/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py @@ -26,7 +26,7 @@ class BlurbsClusteringP2P(AbsTaskClustering): main_score="v_measure", date=None, form=None, - domains=None, + domains=["Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py b/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py index 1a54ed8242..7847ecd768 100644 --- a/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py +++ b/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py @@ -34,7 +34,7 @@ class BlurbsClusteringS2S(AbsTaskClustering): main_score="v_measure", date=None, form=None, - domains=None, + domains=["Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py b/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py index f62c0f0aca..d4ac2f2581 100644 --- a/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py +++ b/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py @@ -24,8 +24,8 @@ class TenKGnadClusteringS2S(AbsTaskClustering): main_score="v_measure", date=None, form=None, - domains=None, - task_subtypes=None, + domains=["News", "Non-fiction", "Written"], + task_subtypes=["Topic classification"], license=None, annotations_creators=None, dialect=None, @@ -57,7 +57,7 @@ class TenKGnadClusteringS2SFast(AbsTaskClusteringFast): "2020-12-31", ), # since it is news it is guessed that it is from 2000 to 2020 domains=["News", "Non-fiction", "Written"], - task_subtypes=None, + task_subtypes=["Topic classification"], license="cc-by-sa-4.0", annotations_creators="derived", dialect=[], diff --git a/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py b/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py index 0e95b82773..b1ef7f09b6 100644 --- a/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py +++ b/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py @@ -30,14 +30,14 @@ class AlloProfClusteringP2P(AbsTaskClustering): eval_splits=["test"], eval_langs=["fra-Latn"], main_score="v_measure", - date=None, + date=("1996-01-01", "2023-04-14"), form=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + domains=["Encyclopaedic", "Written"], + task_subtypes=["Thematic clustering"], + license="mit", + annotations_creators="human-annotated", dialect=None, - sample_creation=None, + sample_creation="found", bibtex_citation="""@misc{lef23, doi = {10.48550/ARXIV.2302.07738}, url = {https://arxiv.org/abs/2302.07738}, diff --git a/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py b/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py index 1b4f609827..fcd2e18455 100644 --- a/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py +++ b/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py @@ -30,14 +30,14 @@ class AlloProfClusteringS2S(AbsTaskClustering): eval_splits=["test"], eval_langs=["fra-Latn"], main_score="v_measure", - date=None, + date=("1996-01-01", "2023-04-14"), form=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + domains=["Encyclopaedic", "Written"], + task_subtypes=["Thematic clustering"], + license="mit", + annotations_creators="human-annotated", dialect=None, - sample_creation=None, + sample_creation="found", bibtex_citation="""@misc{lef23, doi = {10.48550/ARXIV.2302.07738}, url = {https://arxiv.org/abs/2302.07738}, diff --git a/mteb/tasks/Clustering/fra/HALClusteringS2S.py b/mteb/tasks/Clustering/fra/HALClusteringS2S.py index c6254befe6..cb4cc319a7 100644 --- a/mteb/tasks/Clustering/fra/HALClusteringS2S.py +++ b/mteb/tasks/Clustering/fra/HALClusteringS2S.py @@ -32,14 +32,14 @@ class HALClusteringS2S(AbsTaskClustering): eval_splits=["test"], eval_langs=["fra-Latn"], main_score="v_measure", - date=None, + date=("2000-03-29", "2024-05-24"), form=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + domains=["Academic", "Written"], + task_subtypes=["Thematic clustering"], + license="apache-2.0", + annotations_creators="human-annotated", dialect=None, - sample_creation=None, + sample_creation="found", bibtex_citation="""@misc{ciancone2024extending, title={Extending the Massive Text Embedding Benchmark to French}, author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py index 7e8b22b9af..7af80b5cdd 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py @@ -45,11 +45,11 @@ class MasakhaNEWSClusteringS2S(AbsTaskClustering, MultilingualTask): eval_splits=["test"], eval_langs=_LANGUAGES, main_score="v_measure", - date=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + date=("2023-04-21", "2023-05-26"), + domains=["News", "Written"], + task_subtypes=["Topic classification"], + license="afl-3.0", + annotations_creators="human-annotated", dialect=None, sample_creation=None, bibtex_citation="""@article{adelani2023masakhanews, diff --git a/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py b/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py index 6deba76d8d..98deac52e9 100644 --- a/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py +++ b/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py @@ -26,7 +26,16 @@ class CExaPPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" """, + bibtex_citation="""@INPROCEEDINGS{9786243, + author={Sadeghi, Reyhaneh and Karbasi, Hamed and Akbari, Ahmad}, + booktitle={2022 8th International Conference on Web Research (ICWR)}, + title={ExaPPC: a Large-Scale Persian Paraphrase Detection Corpus}, + year={2022}, + volume={}, + number={}, + pages={168-175}, + keywords={Data mining;Task analysis;Paraphrase Identification;Semantic Similarity;Deep Learning;Paraphrasing Corpora}, + doi={10.1109/ICWR54782.2022.9786243}}""", ) def dataset_transform(self): @@ -214,13 +223,21 @@ class ParsinluEntail(AbsTaskPairClassification): eval_langs=["fas-Arab"], main_score="max_ap", date=("2024-09-01", "2024-12-31"), - domains=[], + domains=["Reviews", "Written"], task_subtypes=[], license="not specified", annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" """, + bibtex_citation="""@misc{khashabi2021parsinlusuitelanguageunderstanding, + title={ParsiNLU: A Suite of Language Understanding Challenges for Persian}, + author={Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh}, + year={2021}, + eprint={2012.06154}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2012.06154}, +}""", ) def dataset_transform(self): @@ -257,13 +274,21 @@ class ParsinluQueryParaphPC(AbsTaskPairClassification): eval_langs=["fas-Arab"], main_score="max_ap", date=("2024-09-01", "2024-12-31"), - domains=[], + domains=["Reviews", "Written"], task_subtypes=[], license="not specified", annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" """, + bibtex_citation="""@misc{khashabi2021parsinlusuitelanguageunderstanding, + title={ParsiNLU: A Suite of Language Understanding Challenges for Persian}, + author={Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh}, + year={2021}, + eprint={2012.06154}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2012.06154}, +}""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/pol/PolishPC.py b/mteb/tasks/PairClassification/pol/PolishPC.py index 099a953642..9e431b05ec 100644 --- a/mteb/tasks/PairClassification/pol/PolishPC.py +++ b/mteb/tasks/PairClassification/pol/PolishPC.py @@ -21,7 +21,7 @@ class SickePLPC(AbsTaskPairClassification): eval_langs=["pol-Latn"], main_score="max_ap", date=None, - domains=None, + domains=["Reviews"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Reranking/zho/CMTEBReranking.py b/mteb/tasks/Reranking/zho/CMTEBReranking.py index c701aa9227..ea74d1fd34 100644 --- a/mteb/tasks/Reranking/zho/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zho/CMTEBReranking.py @@ -21,9 +21,9 @@ class T2Reranking(AbsTaskReranking): main_score="map", date=None, form=None, - domains=None, + domains=[], task_subtypes=None, - license=None, + license="not specified", annotations_creators=None, dialect=None, sample_creation=None, diff --git a/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py b/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py index 111eb986ed..745a0fe60d 100644 --- a/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py @@ -25,8 +25,8 @@ class GerDaLIR(AbsTaskRetrieval): eval_langs=["deu-Latn"], main_score="ndcg_at_10", date=None, - domains=None, - task_subtypes=None, + domains=["Legal"], + task_subtypes=[], license=None, annotations_creators=None, dialect=None, diff --git a/mteb/tasks/Retrieval/eng/FEVERRetrieval.py b/mteb/tasks/Retrieval/eng/FEVERRetrieval.py index 2a6130e804..a4513179eb 100644 --- a/mteb/tasks/Retrieval/eng/FEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/FEVERRetrieval.py @@ -80,10 +80,10 @@ class FEVERHardNegatives(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + domains=["Encyclopaedic", "Written"], + task_subtypes=["Claim verification"], + license="cc-by-nc-sa-3.0", + annotations_creators="human-annotated", dialect=None, sample_creation=None, bibtex_citation="""@inproceedings{thorne-etal-2018-fever, diff --git a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py index eec977f926..643a414ada 100644 --- a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py @@ -44,11 +44,17 @@ class T2Retrieval(AbsTaskRetrieval): eval_splits=["dev"], eval_langs=["cmn-Hans"], main_score="ndcg_at_10", - date=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + date=("2023-04-04", "2023-05-16"), + domains=[ + "Medical", + "Academic", + "Financial", + "Government", + "Non-fiction", + ], + task_subtypes=[], + license="apache-2.0", + annotations_creators="human-annotated", dialect=None, sample_creation=None, bibtex_citation="""@misc{xie2023t2ranking, @@ -193,14 +199,22 @@ class CovidRetrieval(AbsTaskRetrieval): eval_splits=["dev"], eval_langs=["cmn-Hans"], main_score="ndcg_at_10", - date=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, - dialect=None, + date=("2022-03-03", "2022-03-18"), + domains=["Medical", "Entertainment"], + task_subtypes=[], + license="not specified", + annotations_creators="human-annotated", + dialect=[], sample_creation=None, - bibtex_citation=None, + bibtex_citation="""@misc{long2022multicprmultidomainchinese, + title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + year={2022}, + eprint={2203.03367}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2203.03367}, +}""", prompt={ "query": "Given a question on COVID-19, retrieve news articles that answer the question" }, @@ -242,7 +256,15 @@ class CmedqaRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=None, + bibtex_citation="""@misc{qiu2022dureaderretrievallargescalechinesebenchmark, + title={DuReader_retrieval: A Large-scale Chinese Benchmark for Passage Retrieval from Web Search Engine}, + author={Yifu Qiu and Hongyu Li and Yingqi Qu and Ying Chen and Qiaoqiao She and Jing Liu and Hua Wu and Haifeng Wang}, + year={2022}, + eprint={2203.10232}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2203.10232}, +}""", prompt={ "query": "Given a Chinese community medical question, retrieve replies that best answer the question" }, @@ -286,7 +308,15 @@ class EcomRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=None, + bibtex_citation="""@misc{long2022multicprmultidomainchinese, + title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + year={2022}, + eprint={2203.03367}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2203.03367}, +}""", prompt={ "query": "Given a user query from an e-commerce website, retrieve description sentences of relevant products" }, @@ -330,7 +360,15 @@ class MedicalRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=None, + bibtex_citation="""@misc{long2022multicprmultidomainchinese, + title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + year={2022}, + eprint={2203.03367}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2203.03367}, +}""", prompt={ "query": "Given a medical question, retrieve user replies that best answer the question" }, @@ -374,7 +412,15 @@ class VideoRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=None, + bibtex_citation="""@misc{long2022multicprmultidomainchinese, + title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + year={2022}, + eprint={2203.03367}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2203.03367}, +}""", prompt={ "query": "Given a video search query, retrieve the titles of relevant videos" }, diff --git a/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py b/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py index 34add4378e..a0552eb4df 100644 --- a/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py @@ -21,10 +21,10 @@ class GermanSTSBenchmarkSTS(AbsTaskSTS): eval_splits=["validation", "test"], eval_langs=["deu-Latn"], main_score="cosine_spearman", - date=None, - domains=None, + date=("2023-11-09", "2024-01-24"), + domains=[], task_subtypes=None, - license=None, + license="cc-by-sa-3.0", annotations_creators=None, dialect=None, sample_creation=None, diff --git a/mteb/tasks/STS/fra/SickFrSTS.py b/mteb/tasks/STS/fra/SickFrSTS.py index 241aa60163..c34a933121 100644 --- a/mteb/tasks/STS/fra/SickFrSTS.py +++ b/mteb/tasks/STS/fra/SickFrSTS.py @@ -21,7 +21,7 @@ class SickFrSTS(AbsTaskSTS): eval_langs=["fra-Latn"], main_score="cosine_spearman", date=None, - domains=None, + domains=[], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/STS/zho/CMTEBSTS.py b/mteb/tasks/STS/zho/CMTEBSTS.py index c7c0134d2a..bcc149f937 100644 --- a/mteb/tasks/STS/zho/CMTEBSTS.py +++ b/mteb/tasks/STS/zho/CMTEBSTS.py @@ -196,7 +196,7 @@ class STSB(AbsTaskSTS): eval_langs=["cmn-Hans"], main_score="cosine_spearman", date=None, - domains=None, + domains=[], task_subtypes=None, license=None, annotations_creators=None,