From da2dddd15a0db4d2b2cb4a1357c9d5656bd94d07 Mon Sep 17 00:00:00 2001 From: Alexey Vatolin Date: Thu, 1 May 2025 21:12:52 +0200 Subject: [PATCH 1/6] Fix errors in bibtex_citation --- .../fil/FilipinoShopeeReviewsClassification.py | 2 +- .../multilingual/AfriSentiClassification.py | 6 +++--- mteb/tasks/Image/ImageClustering/eng/TinyImageNet.py | 2 +- .../Image/ZeroShotClassification/eng/RenderedSST2.py | 2 +- .../PairClassification/deu/FalseFriendsDeEnPC.py | 6 +++--- mteb/tasks/PairClassification/por/SickBrPC.py | 12 ++++++------ mteb/tasks/Reranking/eng/MindSmallReranking.py | 2 +- mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py | 2 +- mteb/tasks/Retrieval/jpn/JaqketRetrieval.py | 2 +- .../CrossLingualSemanticDiscriminationWMT19.py | 2 +- .../CrossLingualSemanticDiscriminationWMT21.py | 2 +- .../tasks/Retrieval/multilingual/XMarketRetrieval.py | 6 ++++-- mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py | 4 ++-- mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py | 4 ++-- .../Retrieval/vie/GreenNodeTableMarkdownRetrieval.py | 4 +--- mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py | 4 +--- mteb/tasks/STS/por/SickBrSTS.py | 8 ++++---- 17 files changed, 34 insertions(+), 36 deletions(-) diff --git a/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py b/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py index d91af36567..373526cf0c 100644 --- a/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py @@ -29,7 +29,7 @@ class FilipinoShopeeReviewsClassification(AbsTaskClassification): bibtex_citation=""" @article{riegoenhancement, title={Enhancement to Low-Resource Text Classification via Sequential Transfer Learning}, - author={Riego, Neil Christian R. and Villarba, Danny Bell and Sison, Ariel Antwaun Rolando C. and Pineda, Fernandez C. and Lagunzad, Herminiño C.} + author={Riego, Neil Christian R. and Villarba, Danny Bell and Sison, Ariel Antwaun Rolando C. and Pineda, Fernandez C. and Lagunzad, Herminiño C.}, journal={United International Journal for Research & Technology}, volume={04}, issue={08}, diff --git a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py index 8a4a79d68b..46332e1bee 100644 --- a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py +++ b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py @@ -53,9 +53,9 @@ class AfriSentiClassification(MultilingualTask, AbsTaskClassification): dialect=[], sample_creation="found", bibtex_citation="""@inproceedings{Muhammad2023AfriSentiAT, - title=AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages, - author=Shamsuddeen Hassan Muhammad and Idris Abdulmumin and Abinew Ali Ayele and Nedjma Ousidhoum and David Ifeoluwa Adelani and Seid Muhie Yimam and Ibrahim Sa'id Ahmad and Meriem Beloucif and Saif Mohammad and Sebastian Ruder and Oumaima Hourrane and Pavel Brazdil and Felermino D'ario M'ario Ant'onio Ali and Davis Davis and Salomey Osei and Bello Shehu Bello and Falalu Ibrahim and Tajuddeen Gwadabe and Samuel Rutunda and Tadesse Belay and Wendimu Baye Messelle and Hailu Beshada Balcha and Sisay Adugna Chala and Hagos Tesfahun Gebremichael and Bernard Opoku and Steven Arthur, - year=2023 + title={AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages}, + author={Shamsuddeen Hassan Muhammad and Idris Abdulmumin and Abinew Ali Ayele and Nedjma Ousidhoum and David Ifeoluwa Adelani and Seid Muhie Yimam and Ibrahim Sa'id Ahmad and Meriem Beloucif and Saif Mohammad and Sebastian Ruder and Oumaima Hourrane and Pavel Brazdil and Felermino D'ario M'ario Ant'onio Ali and Davis Davis and Salomey Osei and Bello Shehu Bello and Falalu Ibrahim and Tajuddeen Gwadabe and Samuel Rutunda and Tadesse Belay and Wendimu Baye Messelle and Hailu Beshada Balcha and Sisay Adugna Chala and Hagos Tesfahun Gebremichael and Bernard Opoku and Steven Arthur}, + year={2023} }""", ) diff --git a/mteb/tasks/Image/ImageClustering/eng/TinyImageNet.py b/mteb/tasks/Image/ImageClustering/eng/TinyImageNet.py index d49ebbfde6..dafc9686d9 100644 --- a/mteb/tasks/Image/ImageClustering/eng/TinyImageNet.py +++ b/mteb/tasks/Image/ImageClustering/eng/TinyImageNet.py @@ -29,7 +29,7 @@ class TinyImageNet(AbsTaskImageClustering): dialect=[], modalities=["image"], sample_creation="found", - bibtex_citation="""d""", + bibtex_citation="", descriptive_stats={ "n_samples": {"valid": 10000}, "avg_character_length": {"valid": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/RenderedSST2.py b/mteb/tasks/Image/ZeroShotClassification/eng/RenderedSST2.py index 01152f6d66..81fde5e1d3 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/RenderedSST2.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/RenderedSST2.py @@ -28,7 +28,7 @@ class RenderedSST2(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""d""", + bibtex_citation="", descriptive_stats={ "n_samples": {"test": 1820}, "avg_character_length": {"test": 10.0}, diff --git a/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py b/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py index bd1edf2f0e..9c34efc136 100644 --- a/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py +++ b/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py @@ -29,11 +29,11 @@ class FalseFriendsDeEnPC(AbsTaskPairClassification): sample_creation="created", bibtex_citation=""" @misc{Chibb_2022, - title="German-English False Friends in Multilingual Transformer Models: An Evaluation on Robustness and Word-to-Word Fine-Tuning", + title="{German-English False Friends in Multilingual Transformer Models: An Evaluation on Robustness and Word-to-Word Fine-Tuning}", author="Chibb, Aaron", year="2022", - month="Sep" - abstract="This paper explores the robustness of multilingual language models against false friends. False friends are words that sound or are written the same in two different languages but have different meaning. Generally, it is argued that multilingual models, such as XLM-RoBERTA, can outperform monolingual models in most tasks on conventional datasets. However, false friends are not considered in these tests. In this paper, experiments with a false friends dataset show that multilingual models are not robust against false friends; they have problems creating monolingual representations and differentiating between meanings of similarly written words in different languages. An attempt of word-based finetuning multilingual models on false friends pairs is promising, however the results do not generally solve the presented problem and still, monolingual models are more robust against false friends." + month="Sep", + abstract="{This paper explores the robustness of multilingual language models against false friends. False friends are words that sound or are written the same in two different languages but have different meaning. Generally, it is argued that multilingual models, such as XLM-RoBERTA, can outperform monolingual models in most tasks on conventional datasets. However, false friends are not considered in these tests. In this paper, experiments with a false friends dataset show that multilingual models are not robust against false friends; they have problems creating monolingual representations and differentiating between meanings of similarly written words in different languages. An attempt of word-based finetuning multilingual models on false friends pairs is promising, however the results do not generally solve the presented problem and still, monolingual models are more robust against false friends.}" } """, ) diff --git a/mteb/tasks/PairClassification/por/SickBrPC.py b/mteb/tasks/PairClassification/por/SickBrPC.py index 6ac2cb9a38..f656b4e710 100644 --- a/mteb/tasks/PairClassification/por/SickBrPC.py +++ b/mteb/tasks/PairClassification/por/SickBrPC.py @@ -28,7 +28,7 @@ class SickBrPC(AbsTaskPairClassification): sample_creation="human-translated and localized", bibtex_citation=""" @inproceedings{real18, - author="Real, Livy + author={Real, Livy and Rodrigues, Ana and Vieira e Silva, Andressa and Albiero, Beatriz @@ -36,13 +36,13 @@ class SickBrPC(AbsTaskPairClassification): and Guide, Bruno and Silva, Cindy and de Oliveira Lima, Guilherme - and C{\\^a}mara, Igor C. S. - and Stanojevi{\\'{c}}, Milo{\\v{s}} + and C{\^a}mara, Igor C. S. + and Stanojevi{\'{c}}, Milo{\v{s}} and Souza, Rodrigo - and de Paiva, Valeria" + and de Paiva, Valeria}, year ="2018", - title="SICK-BR: A Portuguese Corpus for Inference", - booktitle="Computational Processing of the Portuguese Language. PROPOR 2018.", + title="{SICK-BR: A Portuguese Corpus for Inference}", + booktitle="{Computational Processing of the Portuguese Language. PROPOR 2018.}", doi ="10.1007/978-3-319-99722-3_31", isbn="978-3-319-99722-3" } diff --git a/mteb/tasks/Reranking/eng/MindSmallReranking.py b/mteb/tasks/Reranking/eng/MindSmallReranking.py index f1253ba435..a0f0428958 100644 --- a/mteb/tasks/Reranking/eng/MindSmallReranking.py +++ b/mteb/tasks/Reranking/eng/MindSmallReranking.py @@ -46,5 +46,5 @@ class MindSmallReranking(AbsTaskReranking): news recommendation highly relies on the quality of news content understanding and user interest modeling. Many natural language processing techniques such as effective text representation methods and pre-trained language models can effectively improve the performance of news recommendation. The MIND dataset will be - available at https://msnews.github.io}.", }""", + available at https://msnews.github.io.", }""", ) diff --git a/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py b/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py index 2de3c2441b..6f4eada30d 100644 --- a/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py @@ -49,7 +49,7 @@ class GermanQuADRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""misc{möller2021germanquad, + bibtex_citation="""@misc{möller2021germanquad, title={GermanQuAD and GermanDPR: Improving Non-English Question Answering and Passage Retrieval}, author={Timo Möller and Julian Risch and Malte Pietsch}, year={2021}, diff --git a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py index bff152e239..c84b8fb72a 100644 --- a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py @@ -31,7 +31,7 @@ class JaqketRetrieval(AbsTaskRetrieval): title = "JAQKET: クイズを題材にした日本語 QA データセットの構築", booktitle = "言語処理学会第26回年次大会", year = "2020", -url = "https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf" +url = "https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf", note= "in Japanese" }""", ) diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py index 4ca7c5e495..2d207c74bf 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py @@ -52,7 +52,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval, MultilingualTask annotations_creators="derived", dialect=[], sample_creation="LM-generated and verified", - bibtex_citation="preprint_coming", + bibtex_citation="", # preprint_coming ) def __init__(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py index f5c0262308..9235c04d04 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py @@ -52,7 +52,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval, MultilingualTask annotations_creators="derived", dialect=[], sample_creation="LM-generated and verified", - bibtex_citation="preprint_coming", + bibtex_citation="", # preprint_coming ) def __init__(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py index 01d240eb9d..bf3dfe8218 100644 --- a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py @@ -86,7 +86,8 @@ class XMarket(MultilingualTask, AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{Bonab_2021, series={CIKM ’21}, + bibtex_citation=""" +@inproceedings{Bonab_2021, series={CIKM ’21}, title={Cross-Market Product Recommendation}, url={http://dx.doi.org/10.1145/3459637.3482493}, DOI={10.1145/3459637.3482493}, @@ -94,7 +95,8 @@ class XMarket(MultilingualTask, AbsTaskRetrieval): publisher={ACM}, author={Bonab, Hamed and Aliannejadi, Mohammad and Vardasbi, Ali and Kanoulas, Evangelos and Allan, James}, year={2021}, - month=oct, collection={CIKM ’21} }""", + month=oct, collection={CIKM ’21} } +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py b/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py index 9d68ad507a..9a4780612c 100644 --- a/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py @@ -30,7 +30,7 @@ class MSMARCOPL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation=""""@misc{wojtasik2024beirpl, + bibtex_citation="""@misc{wojtasik2024beirpl, title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, year={2024}, @@ -67,7 +67,7 @@ class MSMARCOPLHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation=""""@misc{wojtasik2024beirpl, + bibtex_citation="""@misc{wojtasik2024beirpl, title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, year={2024}, diff --git a/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py b/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py index 12586ba697..9666c835cd 100644 --- a/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py @@ -28,7 +28,7 @@ class QuoraPLRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="machine-translated", - bibtex_citation=""""@misc{wojtasik2024beirpl, + bibtex_citation="""@misc{wojtasik2024beirpl, title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, year={2024}, @@ -63,7 +63,7 @@ class QuoraPLRetrievalHardNegatives(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="machine-translated", - bibtex_citation=""""@misc{wojtasik2024beirpl, + bibtex_citation="""@misc{wojtasik2024beirpl, title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, year={2024}, diff --git a/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py b/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py index 0fb95b0771..57d36d4312 100644 --- a/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py +++ b/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py @@ -29,7 +29,5 @@ class GreenNodeTableMarkdownRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - TODO: Add bibtex citation when the paper is published - """, + bibtex_citation="", # TODO: Add bibtex citation when the paper is published ) diff --git a/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py b/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py index 33cf333f9a..379bc02235 100644 --- a/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py +++ b/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py @@ -27,7 +27,5 @@ class ZacLegalTextRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - TODO: Add bibtex citation when the paper is published - """, + bibtex_citation="", # TODO: Add bibtex citation when the paper is published ) diff --git a/mteb/tasks/STS/por/SickBrSTS.py b/mteb/tasks/STS/por/SickBrSTS.py index 5298ab5437..ee75ecdd39 100644 --- a/mteb/tasks/STS/por/SickBrSTS.py +++ b/mteb/tasks/STS/por/SickBrSTS.py @@ -31,7 +31,7 @@ class SickBrSTS(AbsTaskSTS): sample_creation="human-translated and localized", bibtex_citation=""" @inproceedings{real18, - author="Real, Livy + author={Real, Livy and Rodrigues, Ana and Vieira e Silva, Andressa and Albiero, Beatriz @@ -42,10 +42,10 @@ class SickBrSTS(AbsTaskSTS): and Camara, Igor C. S. and Stanojevi{\'{c}}, Milo{\v{s}} and Souza, Rodrigo - and de Paiva, Valeria" + and de Paiva, Valeria}, year ="2018", - title="SICK-BR: A Portuguese Corpus for Inference", - booktitle="Computational Processing of the Portuguese Language. PROPOR 2018.", + title="{SICK-BR: A Portuguese Corpus for Inference}", + booktitle="{Computational Processing of the Portuguese Language. PROPOR 2018.}", doi ="10.1007/978-3-319-99722-3_31", isbn="978-3-319-99722-3" } From d45bcc6aa894cb1567879aa423922b2a67c3c73a Mon Sep 17 00:00:00 2001 From: Alexey Vatolin Date: Thu, 1 May 2025 23:14:03 +0200 Subject: [PATCH 2/6] Format all bibtex_citation fields --- .../dan/BornholmskBitextMining.py | 22 +- .../eng/PubChemSMILESBitextMining.py | 37 +- .../multilingual/BUCCBitextMining.py | 38 +- .../multilingual/BUCCBitextMiningFast.py | 38 +- .../multilingual/BibleNLPBitextMining.py | 14 +- .../multilingual/DiaBLaBitextMining.py | 18 +- .../multilingual/FloresBitextMining.py | 18 +- .../multilingual/IN22ConvBitextMining.py | 20 +- .../multilingual/IN22GenBitextMining.py | 20 +- .../multilingual/IWSLT2017BitextMining.py | 38 +- .../IndicGenBenchFloresBitextMining.py | 18 +- .../multilingual/LinceMTBitextMining.py | 18 +- .../multilingual/NTREXBitextMining.py | 20 +- .../multilingual/NollySentiBitextMining.py | 18 +- .../NorwegianCourtsBitextMining.py | 10 +- .../NusaTranslationBitextMining.py | 19 +- .../multilingual/NusaXBitextMining.py | 35 +- .../multilingual/PhincBitextMining.py | 18 +- .../multilingual/TatoebaBitextMining.py | 14 +- .../multilingual/WebFAQBitextMining.py | 40 +- .../BitextMining/srn/SRNCorpusBitextMining.py | 10 +- .../BitextMining/vie/VieMedEVBitextMining.py | 26 +- mteb/tasks/Classification/ara/AJGT.py | 14 +- .../ara/HotelReviewSentimentClassification.py | 14 +- ...RestaurantReviewSentimentClassification.py | 14 +- .../ara/TweetEmotionClassification.py | 14 +- .../ara/TweetSarcasmClassification.py | 38 +- .../ben/BengaliDocumentClassification.py | 32 +- .../ben/BengaliHateSpeechClassification.py | 13 +- .../ben/BengaliSentimentAnalysis.py | 16 +- ...garianStoreReviewSentimentClassfication.py | 17 +- ...SFDCZMovieReviewSentimentClassification.py | 14 +- ...echProductReviewSentimentClassification.py | 36 +- .../ces/CzechSoMeSentimentClassification.py | 36 +- .../ces/CzechSubjectivityClassification.py | 23 +- .../dan/AngryTweetsClassification.py | 16 +- .../dan/DKHateClassification.py | 60 +- .../DanishPoliticalCommentsClassification.py | 14 +- .../dan/DdiscoCohesionClassification.py | 58 +- .../dan/LccSentimentClassification.py | 42 +- ...liticiansTwitterSentimentClassification.py | 44 +- .../deu/TenKGnadClassification.py | 24 +- .../ell/GreekLegalCodeClassification.py | 21 +- .../eng/AmazonPolarityClassification.py | 16 +- .../Classification/eng/ArxivClassification.py | 22 +- .../eng/Banking77Classification.py | 50 +- .../eng/DBpediaClassification.py | 26 +- .../eng/EmotionClassification.py | 44 +- .../eng/FinancialPhrasebankClassification.py | 18 +- .../eng/FrenkEnClassification.py | 20 +- .../Classification/eng/ImdbClassification.py | 40 +- .../eng/LegalBenchClassification.py | 3560 +++++++++-------- .../Classification/eng/NewsClassification.py | 25 +- .../eng/PatentClassification.py | 38 +- .../eng/PoemSentimentClassification.py | 14 +- .../eng/SDSEyeProtectionClassification.py | 31 +- .../eng/SDSGlovesClassification.py | 31 +- .../eng/ToxicChatClassification.py | 18 +- .../eng/ToxicConversationsClassification.py | 16 +- .../TweetSentimentExtractionClassification.py | 16 +- .../eng/TweetTopicSingleClassification.py | 32 +- .../eng/WikipediaBioMetChemClassification.py | 16 +- .../WikipediaBiolumNeurochemClassification.py | 16 +- ...kipediaChemEngSpecialtiesClassification.py | 16 +- .../eng/WikipediaChemFieldsClassification.py | 16 +- .../WikipediaChemistryTopicsClassification.py | 16 +- ...pediaCompChemSpectroscopyClassification.py | 16 +- ...ediaCryobiologySeparationClassification.py | 16 +- ...CrystallographyAnalyticalClassification.py | 16 +- ...ediaGreenhouseEnantiopureClassification.py | 16 +- .../WikipediaIsotopesFissionClassification.py | 16 +- .../WikipediaLuminescenceClassification.py | 16 +- ...WikipediaOrganicInorganicClassification.py | 16 +- ...ipediaSaltsSemiconductorsClassification.py | 16 +- ...ipediaSolidStateColloidalClassification.py | 16 +- ...kipediaTheoreticalAppliedClassification.py | 16 +- .../eng/YahooAnswersTopicsClassification.py | 25 +- .../eng/YelpReviewFullClassification.py | 26 +- .../Classification/est/estonian_valence.py | 17 +- .../fas/PersianFoodSentimentClassification.py | 18 +- .../fil/FilipinoHateSpeechClassification.py | 22 +- .../FilipinoShopeeReviewsClassification.py | 19 +- .../fin/FinToxicityClassification.py | 25 +- .../fra/MovieReviewSentimentClassification.py | 4 +- .../heb/HebrewSentimentAnalysis.py | 26 +- .../hin/HindiDiscourseClassification.py | 47 +- .../hin/SentimentAnalysisHindi.py | 17 +- .../hrv/FrenkHrClassification.py | 20 +- .../IndonesianIdClickbaitClassification.py | 26 +- ...esianMongabayConservationClassification.py | 46 +- .../ita/ItaCaseholdClassification.py | 38 +- ...lianLinguistAcceptabilityClassification.py | 32 +- .../jav/JavaneseIMDBClassification.py | 20 +- .../Classification/jpn/WRIMEClassification.py | 54 +- .../kan/KannadaNewsClassification.py | 15 +- .../kat/GeorgianSentimentClassification.py | 56 +- mteb/tasks/Classification/kor/KlueTC.py | 18 +- mteb/tasks/Classification/kor/KorFin.py | 16 +- .../kor/KorHateClassification.py | 19 +- .../kor/KorSarcasmClassification.py | 20 +- .../kur/KurdishSentimentClassification.py | 22 +- .../mal/MalayalamNewsClassification.py | 14 +- .../mar/MarathiNewsClassification.py | 14 +- .../MacedonianTweetSentimentClassification.py | 34 +- .../multilingual/AfriSentiClassification.py | 12 +- .../AmazonCounterfactualClassification.py | 44 +- .../AmazonReviewsClassification.py | 18 +- .../CataloniaTweetClassification.py | 56 +- .../CyrillicTurkicLangClassification.py | 16 +- .../multilingual/HinDialectClassification.py | 19 +- .../multilingual/IndicLangClassification.py | 36 +- .../IndicNLPNewsClassification.py | 15 +- .../IndicSentimentClassification.py | 16 +- .../multilingual/LanguageClassification.py | 26 +- .../multilingual/MTOPDomainClassification.py | 41 +- .../multilingual/MTOPIntentClassification.py | 41 +- .../multilingual/MasakhaNEWSClassification.py | 18 +- .../MassiveIntentClassification.py | 18 +- .../MassiveScenarioClassification.py | 18 +- .../multilingual/MultiHateClassification.py | 92 +- .../MultilingualSentimentClassification.py | 50 +- .../Classification/multilingual/NaijaSenti.py | 47 +- .../multilingual/NordicLangClassification.py | 37 +- .../NusaParagraphEmotionClassification.py | 28 +- .../NusaParagraphTopicClassification.py | 28 +- .../Classification/multilingual/NusaXSenti.py | 24 +- .../multilingual/SIB200Classification.py | 14 +- .../multilingual/ScalaClassification.py | 28 +- .../SouthAfricanLangClassification.py | 16 +- .../SwissJudgementClassification.py | 15 +- .../TweetSentimentClassification.py | 32 +- mteb/tasks/Classification/mya/MyanmarNews.py | 19 +- .../nep/NepaliNewsClassification.py | 42 +- .../DutchBookReviewSentimentClassification.py | 27 +- .../Classification/nob/NoRecClassification.py | 57 +- .../nob/NorwegianParliamentClassification.py | 36 +- .../ory/OdiaNewsClassification.py | 14 +- .../pan/PunjabiNewsClassification.py | 14 +- .../pol/PolishClassification.py | 70 +- .../por/HateSpeechPortugueseClassification.py | 44 +- mteb/tasks/Classification/ron/Moroco.py | 18 +- .../ron/RomanianReviewsSentiment.py | 13 +- .../ron/RomanianSentimentClassification.py | 11 +- .../rus/HeadlineClassification.py | 50 +- .../rus/InappropriatenessClassification.py | 116 +- .../rus/KinopoiskClassification.py | 20 +- .../rus/RuReviewsClassification.py | 26 +- .../rus/ru_nlu_intent_classification.py | 20 +- .../tasks/Classification/rus/senti_ru_eval.py | 13 +- .../san/SanskritShlokasClassification.py | 42 +- .../sin/SinhalaNewsClassification.py | 27 +- .../sin/SinhalaNewsSourceClassification.py | 15 +- ...SFDSKMovieReviewSentimentClassification.py | 14 +- .../slv/FrenkSlClassification.py | 20 +- .../spa/SpanishSentimentClassification.py | 50 +- .../ssw/SiswatiNewsClassification.py | 13 +- ...lovakMovieReviewSentimentClassification.py | 16 +- .../swa/SwahiliNewsClassification.py | 20 +- .../Classification/swe/DalajClassification.py | 14 +- .../swe/SweRecClassification.py | 25 +- .../tam/TamilNewsClassification.py | 14 +- .../tha/WisesightSentimentClassification.py | 26 +- .../tha/WongnaiReviewsClassification .py | 23 +- .../tsn/TswanaNewsClassification.py | 24 +- .../TurkishMovieSentimentClassification.py | 18 +- .../TurkishProductSentimentClassification.py | 18 +- .../ukr/UkrFormalityClassification.py | 22 +- .../urd/UrduRomanSentimentClassification.py | 14 +- .../vie/VieStudentFeedbackClassification.py | 22 +- .../Classification/zho/CMTEBClassification.py | 220 +- .../zho/YueOpenriceReviewClassification.py | 18 +- .../zul/IsiZuluNewsClassification.py | 13 +- .../Clustering/deu/BlurbsClusteringP2P.py | 32 +- .../Clustering/deu/BlurbsClusteringS2S.py | 32 +- .../Clustering/eng/ArxivClusteringP2P.py | 36 +- .../Clustering/eng/ArxivClusteringS2S.py | 18 +- .../Clustering/eng/BigPatentClustering.py | 60 +- .../Clustering/eng/BuiltBenchClusteringP2P.py | 14 +- .../Clustering/eng/BuiltBenchClusteringS2S.py | 14 +- mteb/tasks/Clustering/eng/ClusTrecCovid.py | 29 +- mteb/tasks/Clustering/eng/RedditClustering.py | 56 +- .../Clustering/eng/RedditClusteringP2P.py | 56 +- .../Clustering/eng/StackExchangeClustering.py | 56 +- .../eng/StackExchangeClusteringP2P.py | 56 +- .../eng/TwentyNewsgroupsClustering.py | 58 +- .../Clustering/eng/WikiCitiesClustering.py | 12 +- ...WikipediaChemistrySpecialtiesClustering.py | 16 +- .../eng/WikipediaChemistryTopicsClustering.py | 16 +- .../Clustering/fra/AlloProfClusteringP2P.py | 25 +- .../Clustering/fra/AlloProfClusteringS2S.py | 25 +- mteb/tasks/Clustering/fra/HALClusteringS2S.py | 36 +- .../Clustering/jpn/MewsC16JaClustering.py | 37 +- .../Clustering/kor/KlueMrcDomainClustering.py | 18 +- .../kor/KlueYnatMrcCategoryClustering.py | 18 +- .../multilingual/IndicReviewsClusteringP2P.py | 16 +- .../multilingual/MLSUMClusteringP2P.py | 28 +- .../multilingual/MLSUMClusteringS2S.py | 28 +- .../multilingual/MasakhaNEWSClusteringP2P.py | 16 +- .../multilingual/MasakhaNEWSClusteringS2S.py | 16 +- .../multilingual/SIB200ClusteringS2S.py | 14 +- .../nob/SNLHierarchicalClustering.py | 28 +- .../nob/VGHierarchicalClustering.py | 28 +- mteb/tasks/Clustering/nob/snl_clustering.py | 14 +- mteb/tasks/Clustering/nob/vg_clustering.py | 14 +- mteb/tasks/Clustering/pol/PolishClustering.py | 124 +- mteb/tasks/Clustering/swe/SwednClustering.py | 28 +- mteb/tasks/Clustering/swe/swedn_clustering.py | 14 +- mteb/tasks/Clustering/zho/CMTEBClustering.py | 126 +- .../eng/BLINKIT2IMultiChoice.py | 14 +- .../eng/BLINKIT2TMultiChoice.py | 14 +- .../Image/Any2AnyMultiChoice/eng/CVBench.py | 56 +- .../eng/BLINKIT2IRetrieval.py | 14 +- .../eng/BLINKIT2TRetrieval.py | 14 +- .../Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py | 16 +- .../eng/CUB200I2IRetrieval.py | 16 +- .../Any2AnyRetrieval/eng/EDIST2ITRetrieval.py | 16 +- .../eng/EncyclopediaVQAIT2ITRetrieval.py | 16 +- .../Any2AnyRetrieval/eng/FORBI2IRetrieval.py | 20 +- .../eng/Fashion200kI2TRetrieval.py | 16 +- .../eng/Fashion200kT2IRetrieval.py | 16 +- .../eng/FashionIQIT2IRetrieval.py | 16 +- .../eng/Flickr30kI2TRetrieval.py | 20 +- .../eng/Flickr30kT2IRetrieval.py | 20 +- .../Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py | 16 +- .../Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py | 16 +- .../eng/HatefulMemesI2TRetrieval.py | 18 +- .../eng/HatefulMemesT2IRetrieval.py | 18 +- .../eng/ImageCoDeT2IRetrieval.py | 14 +- .../eng/InfoSeekIT2ITRetrieval.py | 16 +- .../eng/InfoSeekIT2TRetrieval.py | 16 +- .../eng/LLaVAIT2TRetrieval.py | 38 +- .../Any2AnyRetrieval/eng/METI2IRetrieval.py | 14 +- .../eng/MSCOCOI2TRetrieval.py | 18 +- .../eng/MSCOCOT2IRetrieval.py | 18 +- .../eng/MemotionI2TRetrieval.py | 16 +- .../eng/MemotionT2IRetrieval.py | 16 +- .../eng/NIGHTSI2IRetrieval.py | 16 +- .../eng/OKVQAIT2TRetrieval.py | 16 +- .../eng/OVENIT2ITRetrieval.py | 16 +- .../Any2AnyRetrieval/eng/OVENIT2TRetrieval.py | 16 +- .../eng/ROxfordI2IRetrieval.py | 46 +- .../Any2AnyRetrieval/eng/RP2kI2IRetrieval.py | 14 +- .../eng/RParisI2IRetrieval.py | 46 +- .../eng/ReMuQIT2TRetrieval.py | 40 +- .../Any2AnyRetrieval/eng/SOPI2IRetrieval.py | 16 +- .../eng/SciMMIRI2TRetrieval.py | 14 +- .../eng/SciMMIRT2IRetrieval.py | 14 +- .../eng/SketchyI2IRetrieval.py | 14 +- .../eng/StanfordCarsI2IRetrieval.py | 14 +- .../eng/TUBerlinT2IRetrieval.py | 22 +- .../Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py | 16 +- .../eng/VidoreBenchRetrieval.py | 140 +- .../eng/VisualNewsI2TRetrieval.py | 16 +- .../eng/VisualNewsT2IRetrieval.py | 16 +- .../eng/VizWizIT2TRetrieval.py | 16 +- .../eng/WebQAT2ITRetrieval.py | 16 +- .../Any2AnyRetrieval/eng/WebQAT2TRetrieval.py | 16 +- .../multilingual/VdrMultilingualRetrieval.py | 14 +- .../multilingual/WITT2IRetrieval.py | 18 +- .../multilingual/XFlickr30kCoT2IRetrieval.py | 18 +- .../multilingual/XM3600T2IRetrieval.py | 16 +- .../eng/BirdsnapClassification.py | 16 +- .../Image/ImageClassification/eng/CIFAR.py | 29 +- .../eng/Caltech101Classification.py | 24 +- .../eng/Country211Classification.py | 14 +- .../eng/DTDClassification.py | 14 +- .../eng/EuroSATClassification.py | 24 +- .../eng/FER2013Classification.py | 20 +- .../eng/FGVCAircraftClassification.py | 20 +- .../eng/Food101Classification.py | 14 +- .../eng/GTSRBClassification.py | 24 +- .../ImageClassification/eng/Imagenet1k.py | 18 +- .../eng/MNISTClassification.py | 16 +- .../eng/OxfordFlowersClassification.py | 23 +- .../eng/OxfordPetsClassification.py | 24 +- .../eng/PatchCamelyonClassification.py | 27 +- .../eng/RESISC45Classification.py | 24 +- .../eng/STL10Classification.py | 32 +- .../eng/SUN397Classification.py | 22 +- .../eng/StanfordCarsClassification.py | 14 +- .../eng/UCF101Classification.py | 20 +- mteb/tasks/Image/ImageClustering/eng/CIFAR.py | 28 +- .../Image/ImageClustering/eng/ImageNet.py | 48 +- .../eng/PascalVOC2007.py | 22 +- .../AROCocoOrder.py | 14 +- .../AROFlickrOrder.py | 14 +- .../AROVisualAttribution.py | 14 +- .../AROVisualRelation.py | 14 +- .../ImageTextPairClassification/ImageCoDe.py | 14 +- .../ImageTextPairClassification/SugarCrepe.py | 16 +- .../ImageTextPairClassification/Winoground.py | 20 +- .../Image/VisualSTS/eng/STS12VisualSTS.py | 14 +- .../Image/VisualSTS/eng/STS13VisualSTS.py | 14 +- .../Image/VisualSTS/eng/STS14VisualSTS.py | 14 +- .../Image/VisualSTS/eng/STS15VisualSTS.py | 14 +- .../Image/VisualSTS/eng/STS16VisualSTS.py | 14 +- .../STS17MultilingualVisualSTS.py | 14 +- .../STSBenchmarkMultilingualVisualSTS.py | 14 +- .../ZeroShotClassification/eng/Birdsnap.py | 16 +- .../Image/ZeroShotClassification/eng/CIFAR.py | 29 +- .../Image/ZeroShotClassification/eng/CLEVR.py | 34 +- .../ZeroShotClassification/eng/Caltech101.py | 23 +- .../ZeroShotClassification/eng/Country211.py | 14 +- .../Image/ZeroShotClassification/eng/DTD.py | 13 +- .../ZeroShotClassification/eng/EuroSAT.py | 23 +- .../ZeroShotClassification/eng/FER2013.py | 20 +- .../eng/FGVCAircraft.py | 20 +- .../ZeroShotClassification/eng/Food101.py | 14 +- .../Image/ZeroShotClassification/eng/GTSRB.py | 23 +- .../ZeroShotClassification/eng/Imagenet1k.py | 18 +- .../Image/ZeroShotClassification/eng/MNIST.py | 16 +- .../ZeroShotClassification/eng/OxfordPets.py | 20 +- .../eng/PatchCamelyon.py | 30 +- .../ZeroShotClassification/eng/RESISC45.py | 23 +- .../Image/ZeroShotClassification/eng/STL10.py | 32 +- .../ZeroShotClassification/eng/SUN397.py | 21 +- .../ZeroShotClassification/eng/SciMMIR.py | 19 +- .../eng/StanfordCars.py | 14 +- .../ZeroShotClassification/eng/UCF101.py | 20 +- .../eng/Core17InstructionRetrieval.py | 18 +- .../eng/News21InstructionRetrieval.py | 18 +- .../eng/Robust04InstructionRetrieval.py | 18 +- .../multilingual/mFollowIR.py | 28 +- .../kor/KorHateSpeechMLClassification.py | 37 +- .../mlt/MalteseNewsClassification.py | 22 +- .../MultiEURLEXMultilabelClassification.py | 22 +- .../por/BrazilianToxicTweetsClassification.py | 32 +- .../rus/CEDRClassification.py | 21 +- .../rus/SensitiveTopicsClassification.py | 58 +- mteb/tasks/PairClassification/ara/ArEntail.py | 18 +- .../PairClassification/ces/CTKFactsNLI.py | 22 +- .../deu/FalseFriendsDeEnPC.py | 18 +- .../PairClassification/eng/LegalBenchPC.py | 69 +- .../eng/PubChemAISentenceParaphrasePC.py | 37 +- .../PairClassification/eng/PubChemSMILESPC.py | 37 +- .../eng/PubChemSynonymPC.py | 37 +- .../eng/PubChemWikiParagraphsPC.py | 37 +- .../eng/SprintDuplicateQuestionsPC.py | 44 +- .../eng/TwitterSemEval2015PC.py | 38 +- .../eng/TwitterURLCorpusPC.py | 40 +- .../fas/FaMTEBPairClassification.py | 63 +- mteb/tasks/PairClassification/fas/FarsTail.py | 18 +- .../hye/ArmenianParaphrasePC.py | 20 +- mteb/tasks/PairClassification/ind/IndoNLI.py | 24 +- mteb/tasks/PairClassification/kor/KlueNLI.py | 18 +- .../IndicXnliPairClassification.py | 22 +- .../multilingual/OpusparcusPC.py | 18 +- .../multilingual/PawsXPairClassification.py | 18 +- .../PubChemWikiPairClassification.py | 37 +- .../PairClassification/multilingual/RTE3.py | 31 +- .../PairClassification/multilingual/XNLI.py | 52 +- .../multilingual/XStance.py | 22 +- mteb/tasks/PairClassification/pol/PolishPC.py | 160 +- .../tasks/PairClassification/por/Assin2RTE.py | 18 +- mteb/tasks/PairClassification/por/SickBrPC.py | 42 +- mteb/tasks/PairClassification/rus/TERRa.py | 32 +- .../zho/CMTEBPairClassification.py | 106 +- .../Reranking/ara/NamaaMrTydiReranking.py | 16 +- .../Reranking/eng/AskUbuntuDupQuestions.py | 18 +- .../Reranking/eng/BuiltBenchReranking.py | 14 +- .../tasks/Reranking/eng/MindSmallReranking.py | 48 +- mteb/tasks/Reranking/eng/SciDocsReranking.py | 40 +- .../eng/StackOverflowDupQuestions.py | 16 +- .../eng/WebLINXCandidatesReranking.py | 16 +- mteb/tasks/Reranking/fra/AlloprofReranking.py | 22 +- mteb/tasks/Reranking/fra/SyntecReranking.py | 18 +- mteb/tasks/Reranking/jpn/MMarcoReranking.py | 15 +- .../Reranking/multilingual/ESCIReranking.py | 12 +- .../Reranking/multilingual/MIRACLReranking.py | 20 +- .../WikipediaRerankingMultilingual.py | 12 +- mteb/tasks/Reranking/rus/RuBQReranking.py | 16 +- mteb/tasks/Reranking/zho/CMTEBReranking.py | 85 +- .../Retrieval/ara/SadeemQuestionRetrieval.py | 120 +- mteb/tasks/Retrieval/code/AppsRetrieval.py | 14 +- .../code/COIRCodeSearchNetRetrieval.py | 9 +- .../Retrieval/code/CodeEditSearchRetrieval.py | 9 +- .../Retrieval/code/CodeFeedbackMTRetrieval.py | 20 +- .../Retrieval/code/CodeFeedbackSTRetrieval.py | 20 +- mteb/tasks/Retrieval/code/CodeRAG.py | 16 +- .../code/CodeSearchNetCCRetrieval.py | 20 +- .../Retrieval/code/CodeSearchNetRetrieval.py | 9 +- .../code/CodeTransOceanContestRetrieval.py | 20 +- .../code/CodeTransOceanDLRetrieval.py | 20 +- mteb/tasks/Retrieval/code/CosQARetrieval.py | 20 +- .../code/StackOverflowQARetrieval.py | 20 +- .../code/SyntheticText2SqlRetrieval.py | 16 +- mteb/tasks/Retrieval/dan/DanFeverRetrieval.py | 56 +- mteb/tasks/Retrieval/dan/TV2Nordretrieval.py | 58 +- .../Retrieval/dan/TwitterHjerneRetrieval.py | 8 +- mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py | 28 +- .../Retrieval/deu/GerDaLIRSmallRetrieval.py | 28 +- .../tasks/Retrieval/deu/GermanDPRRetrieval.py | 18 +- .../deu/GermanGovServiceRetrieval.py | 26 +- .../Retrieval/deu/GermanQuADRetrieval.py | 18 +- .../tasks/Retrieval/deu/LegalQuADRetrieval.py | 24 +- .../Retrieval/eng/AILACasedocsRetrieval.py | 32 +- .../Retrieval/eng/AILAStatutesRetrieval.py | 32 +- .../Retrieval/eng/ARCChallengeRetrieval.py | 22 +- mteb/tasks/Retrieval/eng/AlphaNLIRetrieval.py | 21 +- mteb/tasks/Retrieval/eng/ArguAnaRetrieval.py | 14 +- mteb/tasks/Retrieval/eng/BrightRetrieval.py | 18 +- .../Retrieval/eng/BuiltBenchRetrieval.py | 14 +- .../eng/CQADupstackAndroidRetrieval.py | 36 +- .../eng/CQADupstackEnglishRetrieval.py | 36 +- .../eng/CQADupstackGamingRetrieval.py | 36 +- .../Retrieval/eng/CQADupstackGisRetrieval.py | 36 +- .../eng/CQADupstackMathematicaRetrieval.py | 36 +- .../eng/CQADupstackPhysicsRetrieval.py | 36 +- .../eng/CQADupstackProgrammersRetrieval.py | 36 +- .../eng/CQADupstackStatsRetrieval.py | 36 +- .../Retrieval/eng/CQADupstackTexRetrieval.py | 36 +- .../Retrieval/eng/CQADupstackUnixRetrieval.py | 36 +- .../eng/CQADupstackWebmastersRetrieval.py | 36 +- .../eng/CQADupstackWordpressRetrieval.py | 36 +- .../Retrieval/eng/ChemHotpotQARetrieval.py | 61 +- mteb/tasks/Retrieval/eng/ChemNQRetrieval.py | 34 +- .../Retrieval/eng/ClimateFEVERRetrieval.py | 54 +- mteb/tasks/Retrieval/eng/DBPediaRetrieval.py | 44 +- mteb/tasks/Retrieval/eng/FEVERRetrieval.py | 80 +- .../tasks/Retrieval/eng/FaithDialRetrieval.py | 26 +- .../Retrieval/eng/FeedbackQARetrieval.py | 36 +- mteb/tasks/Retrieval/eng/FiQA2018Retrieval.py | 17 +- mteb/tasks/Retrieval/eng/HagridRetrieval.py | 14 +- .../tasks/Retrieval/eng/HellaSwagRetrieval.py | 20 +- mteb/tasks/Retrieval/eng/HotpotQARetrieval.py | 96 +- .../Retrieval/eng/LEMBNarrativeQARetrieval.py | 50 +- .../Retrieval/eng/LEMBNeedleRetrieval.py | 16 +- .../Retrieval/eng/LEMBPasskeyRetrieval.py | 16 +- .../tasks/Retrieval/eng/LEMBQMSumRetrieval.py | 68 +- .../eng/LEMBSummScreenFDRetrieval.py | 42 +- .../Retrieval/eng/LEMBWikimQARetrieval.py | 18 +- .../LegalBenchConsumerContractsQARetrieval.py | 26 +- .../LegalBenchCorporateLobbyingRetrieval.py | 124 +- .../eng/LegalSummarizationRetrieval.py | 26 +- .../tasks/Retrieval/eng/LitSearchRetrieval.py | 12 +- mteb/tasks/Retrieval/eng/MLQuestions.py | 34 +- mteb/tasks/Retrieval/eng/MSMARCORetrieval.py | 74 +- .../tasks/Retrieval/eng/MSMARCOv2Retrieval.py | 41 +- .../tasks/Retrieval/eng/MedicalQARetrieval.py | 22 +- mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py | 14 +- mteb/tasks/Retrieval/eng/NQRetrieval.py | 36 +- .../Retrieval/eng/NanoArguAnaRetrieval.py | 14 +- .../eng/NanoClimateFeverRetrieval.py | 18 +- .../Retrieval/eng/NanoDBPediaRetrieval.py | 9 +- .../tasks/Retrieval/eng/NanoFEVERRetrieval.py | 40 +- .../Retrieval/eng/NanoFiQA2018Retrieval.py | 17 +- .../Retrieval/eng/NanoHotpotQARetrieval.py | 48 +- .../Retrieval/eng/NanoMSMARCORetrieval.py | 37 +- .../Retrieval/eng/NanoNFCorpusRetrieval.py | 14 +- mteb/tasks/Retrieval/eng/NanoNQRetrieval.py | 18 +- .../tasks/Retrieval/eng/NanoQuoraRetrieval.py | 16 +- .../Retrieval/eng/NanoSCIDOCSRetrieval.py | 14 +- .../Retrieval/eng/NanoSciFactRetrieval.py | 14 +- .../Retrieval/eng/NanoTouche2020Retrieval.py | 36 +- .../Retrieval/eng/NarrativeQARetrieval.py | 18 +- mteb/tasks/Retrieval/eng/PiqaRetrieval.py | 28 +- mteb/tasks/Retrieval/eng/QuailRetrieval.py | 28 +- mteb/tasks/Retrieval/eng/QuoraRetrieval.py | 32 +- mteb/tasks/Retrieval/eng/RARbCodeRetrieval.py | 36 +- mteb/tasks/Retrieval/eng/RARbMathRetrieval.py | 40 +- mteb/tasks/Retrieval/eng/SCIDOCSRetrieval.py | 14 +- mteb/tasks/Retrieval/eng/SciFactRetrieval.py | 14 +- mteb/tasks/Retrieval/eng/SiqaRetrieval.py | 22 +- mteb/tasks/Retrieval/eng/SpartQARetrieval.py | 22 +- .../tasks/Retrieval/eng/TRECCOVIDRetrieval.py | 18 +- .../Retrieval/eng/TempReasonL1Retrieval.py | 22 +- .../eng/TempReasonL2ContextRetrieval.py | 22 +- .../eng/TempReasonL2FactRetrieval.py | 22 +- .../eng/TempReasonL2PureRetrieval.py | 22 +- .../eng/TempReasonL3ContextRetrieval.py | 22 +- .../eng/TempReasonL3FactRetrieval.py | 22 +- .../eng/TempReasonL3PureRetrieval.py | 22 +- mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py | 40 +- .../Retrieval/eng/Touche2020Retrieval.py | 52 +- .../Retrieval/eng/WinoGrandeRetrieval.py | 30 +- mteb/tasks/Retrieval/est/estqa.py | 10 +- mteb/tasks/Retrieval/fra/AlloprofRetrieval.py | 14 +- mteb/tasks/Retrieval/fra/BSARDRetrieval.py | 16 +- mteb/tasks/Retrieval/fra/FQuADRetrieval.py | 40 +- mteb/tasks/Retrieval/fra/SyntecRetrieval.py | 18 +- mteb/tasks/Retrieval/hun/HunSum2.py | 14 +- mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py | 18 +- mteb/tasks/Retrieval/jpn/JaqketRetrieval.py | 18 +- mteb/tasks/Retrieval/kor/AutoRAGRetrieval.py | 20 +- mteb/tasks/Retrieval/kor/KoStrategyQA.py | 8 +- .../multilingual/BelebeleRetrieval.py | 14 +- ...CrossLingualSemanticDiscriminationWMT19.py | 2 +- ...CrossLingualSemanticDiscriminationWMT21.py | 2 +- .../multilingual/IndicQARetrieval.py | 16 +- .../Retrieval/multilingual/MIRACLRetrieval.py | 60 +- .../Retrieval/multilingual/MLQARetrieval.py | 16 +- .../multilingual/MintakaRetrieval.py | 28 +- .../Retrieval/multilingual/MrTidyRetrieval.py | 14 +- .../multilingual/MultiLongDocRetrieval.py | 15 +- .../multilingual/NeuCLIR2022Retrieval.py | 28 +- .../multilingual/NeuCLIR2023Retrieval.py | 36 +- .../multilingual/PublicHealthQARetrieval.py | 16 +- .../StatcanDialogueDatasetRetrieval.py | 24 +- .../Retrieval/multilingual/WebFAQRetrieval.py | 20 +- .../multilingual/XMarketRetrieval.py | 23 +- .../Retrieval/multilingual/XPQARetrieval.py | 16 +- .../Retrieval/multilingual/XQuADRetrieval.py | 34 +- .../tasks/Retrieval/nld/ArguAnaNLRetrieval.py | 20 +- .../nld/CQADupstackAndroidNLRetrieval.py | 20 +- .../nld/CQADupstackEnglishNLRetrieval.py | 20 +- .../nld/CQADupstackGamingNLRetrieval.py | 20 +- .../nld/CQADupstackGisNLRetrieval.py | 20 +- .../nld/CQADupstackMathematicaNLRetrieval.py | 20 +- .../nld/CQADupstackPhysicsNLRetrieval.py | 20 +- .../nld/CQADupstackProgrammersNLRetrieval.py | 20 +- .../nld/CQADupstackStatsNLRetrieval.py | 20 +- .../nld/CQADupstackTexNLRetrieval.py | 20 +- .../nld/CQADupstackUnixNLRetrieval.py | 20 +- .../nld/CQADupstackWebmastersNLRetrieval.py | 20 +- .../nld/CQADupstackWordpressNLRetrieval.py | 20 +- .../Retrieval/nld/ClimateFEVERNLRetrieval.py | 20 +- .../tasks/Retrieval/nld/DBPediaNLRetrieval.py | 20 +- mteb/tasks/Retrieval/nld/FEVERNLRetrieval.py | 20 +- .../Retrieval/nld/FiQA2018NLRetrieval.py | 20 +- .../Retrieval/nld/HotpotQANLRetrieval.py | 20 +- mteb/tasks/Retrieval/nld/MMARCONLRetrieval.py | 34 +- .../Retrieval/nld/NFCorpusNLRetrieval.py | 20 +- mteb/tasks/Retrieval/nld/NQNLRetrieval.py | 20 +- mteb/tasks/Retrieval/nld/QuoraNLRetrieval.py | 20 +- .../tasks/Retrieval/nld/SCIDOCSNLRetrieval.py | 20 +- .../tasks/Retrieval/nld/SciFactNLRetrieval.py | 20 +- .../Retrieval/nld/TRECCOVIDNLRetrieval.py | 20 +- .../Retrieval/nld/Touche2020NLRetrieval.py | 20 +- mteb/tasks/Retrieval/nob/norquad.py | 38 +- mteb/tasks/Retrieval/nob/snl_retrieval.py | 14 +- .../tasks/Retrieval/pol/ArguAnaPLRetrieval.py | 18 +- .../tasks/Retrieval/pol/DBPediaPLRetrieval.py | 44 +- mteb/tasks/Retrieval/pol/FiQAPLRetrieval.py | 17 +- .../Retrieval/pol/HotpotQAPLRetrieval.py | 36 +- .../tasks/Retrieval/pol/MSMARCOPLRetrieval.py | 36 +- .../Retrieval/pol/NFCorpusPLRetrieval.py | 18 +- mteb/tasks/Retrieval/pol/NQPLRetrieval.py | 36 +- mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py | 36 +- .../tasks/Retrieval/pol/SCIDOCSPLRetrieval.py | 18 +- .../tasks/Retrieval/pol/SciFactPLRetrieval.py | 18 +- .../Retrieval/pol/TRECCOVIDPLRetrieval.py | 18 +- mteb/tasks/Retrieval/rus/RiaNewsRetrieval.py | 28 +- mteb/tasks/Retrieval/rus/RuBQRetrieval.py | 16 +- .../tasks/Retrieval/slk/SlovakSumRetrieval.py | 16 +- .../spa/SpanishPassageRetrievalS2P.py | 27 +- .../spa/SpanishPassageRetrievalS2S.py | 27 +- mteb/tasks/Retrieval/swe/SweFaqRetrieval.py | 16 +- mteb/tasks/Retrieval/swe/SwednRetrieval.py | 14 +- mteb/tasks/Retrieval/tur/TurHistQuad.py | 26 +- .../vie/GreenNodeTableMarkdownRetrieval.py | 2 +- mteb/tasks/Retrieval/vie/VieQuADRetrieval.py | 37 +- .../Retrieval/vie/ZacLegalTextRetrieval.py | 2 +- mteb/tasks/Retrieval/zho/CMTEBRetrieval.py | 154 +- mteb/tasks/Retrieval/zho/LeCaRDv2Retrieval.py | 18 +- mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py | 14 +- mteb/tasks/STS/eng/BiossesSTS.py | 32 +- mteb/tasks/STS/eng/STS12STS.py | 28 +- mteb/tasks/STS/eng/STS13STS.py | 16 +- mteb/tasks/STS/eng/STS14STS.py | 38 +- mteb/tasks/STS/eng/STS15STS.py | 34 +- mteb/tasks/STS/eng/STS16STS.py | 46 +- mteb/tasks/STS/eng/STSBenchmarkSTS.py | 14 +- mteb/tasks/STS/eng/SickrSTS.py | 54 +- mteb/tasks/STS/fao/FaroeseSTS.py | 28 +- mteb/tasks/STS/fin/FinParaSTS.py | 48 +- mteb/tasks/STS/jpn/JSICK.py | 22 +- mteb/tasks/STS/jpn/JSTS.py | 56 +- mteb/tasks/STS/kor/KlueSTS.py | 18 +- mteb/tasks/STS/kor/KorSTS.py | 14 +- .../STS/multilingual/IndicCrosslingualSTS.py | 28 +- .../STS/multilingual/STS17CrosslingualSTS.py | 48 +- .../STS/multilingual/STS22CrosslingualSTS.py | 116 +- .../STSBenchmarkMultilingualSTS.py | 14 +- mteb/tasks/STS/multilingual/SemRel24STS.py | 29 +- mteb/tasks/STS/pol/PolishSTS.py | 89 +- mteb/tasks/STS/por/Assin2STS.py | 18 +- mteb/tasks/STS/por/SickBrSTS.py | 38 +- mteb/tasks/STS/ron/RonSTS.py | 16 +- mteb/tasks/STS/rus/RUParaPhraserSTS.py | 49 +- mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py | 14 +- mteb/tasks/STS/spa/STSES.py | 26 +- mteb/tasks/STS/zho/CMTEBSTS.py | 164 +- .../eng/SummEvalSummarization.py | 28 +- .../fra/SummEvalFrSummarization.py | 28 +- .../CQADupStackNLRetrieval.py | 20 +- .../aggregated_tasks/CQADupStackRetrieval.py | 36 +- .../STS17MultilingualVisualSTS.py | 28 +- .../STSBenchmarkMultilingualVisualSTS.py | 28 +- 588 files changed, 10410 insertions(+), 9179 deletions(-) diff --git a/mteb/tasks/BitextMining/dan/BornholmskBitextMining.py b/mteb/tasks/BitextMining/dan/BornholmskBitextMining.py index 242f51ac37..a94765d961 100644 --- a/mteb/tasks/BitextMining/dan/BornholmskBitextMining.py +++ b/mteb/tasks/BitextMining/dan/BornholmskBitextMining.py @@ -27,18 +27,18 @@ class BornholmBitextMining(AbsTaskBitextMining): annotations_creators="expert-annotated", dialect=["da-dan-bornholm"], sample_creation="created", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{derczynskiBornholmskNaturalLanguage2019, - title = {Bornholmsk natural language processing: Resources and tools}, - url = {https://pure.itu.dk/ws/files/84551091/W19_6138.pdf}, - shorttitle = {Bornholmsk natural language processing}, - pages = {338--344}, - booktitle = {Proceedings of the Nordic Conference of Computational Linguistics (2019)}, - publisher = {Linköping University Electronic Press}, - author = {Derczynski, Leon and Kjeldsen, Alex Speed}, - urldate = {2024-04-24}, - date = {2019}, - file = {Available Version (via Google Scholar):/Users/au554730/Zotero/storage/FBQ73ZYN/Derczynski and Kjeldsen - 2019 - Bornholmsk natural language processing Resources .pdf:application/pdf}, + author = {Derczynski, Leon and Kjeldsen, Alex Speed}, + booktitle = {Proceedings of the Nordic Conference of Computational Linguistics (2019)}, + date = {2019}, + file = {Available Version (via Google Scholar):/Users/au554730/Zotero/storage/FBQ73ZYN/Derczynski and Kjeldsen - 2019 - Bornholmsk natural language processing Resources .pdf:application/pdf}, + pages = {338--344}, + publisher = {Linköping University Electronic Press}, + shorttitle = {Bornholmsk natural language processing}, + title = {Bornholmsk natural language processing: Resources and tools}, + url = {https://pure.itu.dk/ws/files/84551091/W19_6138.pdf}, + urldate = {2024-04-24}, } """, prompt="Retrieve parallel sentences.", diff --git a/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py b/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py index 4951d8c596..5e75a54c0c 100644 --- a/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py +++ b/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py @@ -41,24 +41,25 @@ class PubChemSMILESBitextMining(MultilingualTask, AbsTaskBitextMining): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{kim2023pubchem, - title={PubChem 2023 update}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, - journal={Nucleic acids research}, - volume={51}, - number={D1}, - pages={D1373--D1380}, - year={2023}, - publisher={Oxford University Press} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kim2023pubchem, + author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal = {Nucleic acids research}, + number = {D1}, + pages = {D1373--D1380}, + publisher = {Oxford University Press}, + title = {PubChem 2023 update}, + volume = {51}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py index 8c2563bbf3..c22883b112 100644 --- a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py @@ -43,24 +43,26 @@ class BUCCBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated", - bibtex_citation="""@inproceedings{zweigenbaum-etal-2017-overview, - title = "Overview of the Second {BUCC} Shared Task: Spotting Parallel Sentences in Comparable Corpora", - author = "Zweigenbaum, Pierre and - Sharoff, Serge and - Rapp, Reinhard", - editor = "Sharoff, Serge and - Zweigenbaum, Pierre and - Rapp, Reinhard", - booktitle = "Proceedings of the 10th Workshop on Building and Using Comparable Corpora", - month = aug, - year = "2017", - address = "Vancouver, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/W17-2512", - doi = "10.18653/v1/W17-2512", - pages = "60--67", - abstract = "This paper presents the BUCC 2017 shared task on parallel sentence extraction from comparable corpora. It recalls the design of the datasets, presents their final construction and statistics and the methods used to evaluate system results. 13 runs were submitted to the shared task by 4 teams, covering three of the four proposed language pairs: French-English (7 runs), German-English (3 runs), and Chinese-English (3 runs). The best F-scores as measured against the gold standard were 0.84 (German-English), 0.80 (French-English), and 0.43 (Chinese-English). Because of the design of the dataset, in which not all gold parallel sentence pairs are known, these are only minimum values. We examined manually a small sample of the false negative sentence pairs for the most precise French-English runs and estimated the number of parallel sentence pairs not yet in the provided gold standard. Adding them to the gold standard leads to revised estimates for the French-English F-scores of at most +1.5pt. This suggests that the BUCC 2017 datasets provide a reasonable approximate evaluation of the parallel sentence spotting task.", -}""", + bibtex_citation=r""" +@inproceedings{zweigenbaum-etal-2017-overview, + abstract = {This paper presents the BUCC 2017 shared task on parallel sentence extraction from comparable corpora. It recalls the design of the datasets, presents their final construction and statistics and the methods used to evaluate system results. 13 runs were submitted to the shared task by 4 teams, covering three of the four proposed language pairs: French-English (7 runs), German-English (3 runs), and Chinese-English (3 runs). The best F-scores as measured against the gold standard were 0.84 (German-English), 0.80 (French-English), and 0.43 (Chinese-English). Because of the design of the dataset, in which not all gold parallel sentence pairs are known, these are only minimum values. We examined manually a small sample of the false negative sentence pairs for the most precise French-English runs and estimated the number of parallel sentence pairs not yet in the provided gold standard. Adding them to the gold standard leads to revised estimates for the French-English F-scores of at most +1.5pt. This suggests that the BUCC 2017 datasets provide a reasonable approximate evaluation of the parallel sentence spotting task.}, + address = {Vancouver, Canada}, + author = {Zweigenbaum, Pierre and +Sharoff, Serge and +Rapp, Reinhard}, + booktitle = {Proceedings of the 10th Workshop on Building and Using Comparable Corpora}, + doi = {10.18653/v1/W17-2512}, + editor = {Sharoff, Serge and +Zweigenbaum, Pierre and +Rapp, Reinhard}, + month = aug, + pages = {60--67}, + publisher = {Association for Computational Linguistics}, + title = {Overview of the Second {BUCC} Shared Task: Spotting Parallel Sentences in Comparable Corpora}, + url = {https://aclanthology.org/W17-2512}, + year = {2017}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/BitextMining/multilingual/BUCCBitextMiningFast.py b/mteb/tasks/BitextMining/multilingual/BUCCBitextMiningFast.py index 6154003abe..567eb9a281 100644 --- a/mteb/tasks/BitextMining/multilingual/BUCCBitextMiningFast.py +++ b/mteb/tasks/BitextMining/multilingual/BUCCBitextMiningFast.py @@ -38,23 +38,25 @@ class BUCCBitextMiningFast(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated", - bibtex_citation="""@inproceedings{zweigenbaum-etal-2017-overview, - title = "Overview of the Second {BUCC} Shared Task: Spotting Parallel Sentences in Comparable Corpora", - author = "Zweigenbaum, Pierre and - Sharoff, Serge and - Rapp, Reinhard", - editor = "Sharoff, Serge and - Zweigenbaum, Pierre and - Rapp, Reinhard", - booktitle = "Proceedings of the 10th Workshop on Building and Using Comparable Corpora", - month = aug, - year = "2017", - address = "Vancouver, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/W17-2512", - doi = "10.18653/v1/W17-2512", - pages = "60--67", - abstract = "This paper presents the BUCC 2017 shared task on parallel sentence extraction from comparable corpora. It recalls the design of the datasets, presents their final construction and statistics and the methods used to evaluate system results. 13 runs were submitted to the shared task by 4 teams, covering three of the four proposed language pairs: French-English (7 runs), German-English (3 runs), and Chinese-English (3 runs). The best F-scores as measured against the gold standard were 0.84 (German-English), 0.80 (French-English), and 0.43 (Chinese-English). Because of the design of the dataset, in which not all gold parallel sentence pairs are known, these are only minimum values. We examined manually a small sample of the false negative sentence pairs for the most precise French-English runs and estimated the number of parallel sentence pairs not yet in the provided gold standard. Adding them to the gold standard leads to revised estimates for the French-English F-scores of at most +1.5pt. This suggests that the BUCC 2017 datasets provide a reasonable approximate evaluation of the parallel sentence spotting task.", -}""", + bibtex_citation=r""" +@inproceedings{zweigenbaum-etal-2017-overview, + abstract = {This paper presents the BUCC 2017 shared task on parallel sentence extraction from comparable corpora. It recalls the design of the datasets, presents their final construction and statistics and the methods used to evaluate system results. 13 runs were submitted to the shared task by 4 teams, covering three of the four proposed language pairs: French-English (7 runs), German-English (3 runs), and Chinese-English (3 runs). The best F-scores as measured against the gold standard were 0.84 (German-English), 0.80 (French-English), and 0.43 (Chinese-English). Because of the design of the dataset, in which not all gold parallel sentence pairs are known, these are only minimum values. We examined manually a small sample of the false negative sentence pairs for the most precise French-English runs and estimated the number of parallel sentence pairs not yet in the provided gold standard. Adding them to the gold standard leads to revised estimates for the French-English F-scores of at most +1.5pt. This suggests that the BUCC 2017 datasets provide a reasonable approximate evaluation of the parallel sentence spotting task.}, + address = {Vancouver, Canada}, + author = {Zweigenbaum, Pierre and +Sharoff, Serge and +Rapp, Reinhard}, + booktitle = {Proceedings of the 10th Workshop on Building and Using Comparable Corpora}, + doi = {10.18653/v1/W17-2512}, + editor = {Sharoff, Serge and +Zweigenbaum, Pierre and +Rapp, Reinhard}, + month = aug, + pages = {60--67}, + publisher = {Association for Computational Linguistics}, + title = {Overview of the Second {BUCC} Shared Task: Spotting Parallel Sentences in Comparable Corpora}, + url = {https://aclanthology.org/W17-2512}, + year = {2017}, +} +""", adapted_from=["BUCC"], ) diff --git a/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py b/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py index 07724153c9..dedc2355c9 100644 --- a/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py @@ -884,12 +884,14 @@ class BibleNLPBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{akerman2023ebible, - title={The eBible Corpus: Data and Model Benchmarks for Bible Translation for Low-Resource Languages}, - author={Akerman, Vesa and Baines, David and Daspit, Damien and Hermjakob, Ulf and Jang, Taeho and Leong, Colin and Martin, Michael and Mathew, Joel and Robie, Jonathan and Schwarting, Marcus}, - journal={arXiv preprint arXiv:2304.09919}, - year={2023} - }""", + bibtex_citation=r""" +@article{akerman2023ebible, + author = {Akerman, Vesa and Baines, David and Daspit, Damien and Hermjakob, Ulf and Jang, Taeho and Leong, Colin and Martin, Michael and Mathew, Joel and Robie, Jonathan and Schwarting, Marcus}, + journal = {arXiv preprint arXiv:2304.09919}, + title = {The eBible Corpus: Data and Model Benchmarks for Bible Translation for Low-Resource Languages}, + year = {2023}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py index b7806d60ac..893df2c8fd 100644 --- a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py @@ -33,15 +33,15 @@ class DiaBLaBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @inproceedings{gonzalez2019diabla, - title={DiaBLa: A Corpus of Bilingual Spontaneous Written Dialogues for Machine Translation}, - author={González, Matilde and García, Clara and Sánchez, Lucía}, - booktitle={Proceedings of the 12th Language Resources and Evaluation Conference}, - pages={4192--4198}, - year={2019} - } - """, + bibtex_citation=r""" +@inproceedings{gonzalez2019diabla, + author = {González, Matilde and García, Clara and Sánchez, Lucía}, + booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference}, + pages = {4192--4198}, + title = {DiaBLa: A Corpus of Bilingual Spontaneous Written Dialogues for Machine Translation}, + year = {2019}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py index 786b5f0fd9..c28ef5944b 100644 --- a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py @@ -259,15 +259,15 @@ class FloresBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @inproceedings{goyal2022flores, - title={The FLORES-101 Evaluation Benchmark for Low-Resource and Multilingual Machine Translation}, - author={Goyal, Naman and Gao, Cynthia and Chaudhary, Vishrav and Chen, Peng-Jen and Wenzek, Guillaume and Ju, Da and Krishnan, Sanjana and Ranzato, Marc'Aurelio and Guzm{\'a}n, Francisco}, - booktitle={Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, - pages={19--35}, - year={2022} - } - """, + bibtex_citation=r""" +@inproceedings{goyal2022flores, + author = {Goyal, Naman and Gao, Cynthia and Chaudhary, Vishrav and Chen, Peng-Jen and Wenzek, Guillaume and Ju, Da and Krishnan, Sanjana and Ranzato, Marc'Aurelio and Guzm{\'a}n, Francisco}, + booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, + pages = {19--35}, + title = {The FLORES-101 Evaluation Benchmark for Low-Resource and Multilingual Machine Translation}, + year = {2022}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py b/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py index 61a8717507..3ed3395d17 100644 --- a/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py @@ -91,15 +91,17 @@ class IN22ConvBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{gala2023indictrans, -title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages}, -author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan}, -journal={Transactions on Machine Learning Research}, -issn={2835-8856}, -year={2023}, -url={https://openreview.net/forum?id=vfT4YuzAYA}, -note={} -}""", + bibtex_citation=r""" +@article{gala2023indictrans, + author = {Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan}, + issn = {2835-8856}, + journal = {Transactions on Machine Learning Research}, + note = {}, + title = {IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages}, + url = {https://openreview.net/forum?id=vfT4YuzAYA}, + year = {2023}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py b/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py index 503c64e5f0..5082a354d6 100644 --- a/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py @@ -85,15 +85,17 @@ class IN22GenBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{gala2023indictrans, -title={IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages}, -author={Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan}, -journal={Transactions on Machine Learning Research}, -issn={2835-8856}, -year={2023}, -url={https://openreview.net/forum?id=vfT4YuzAYA}, -note={} -}""", + bibtex_citation=r""" +@article{gala2023indictrans, + author = {Jay Gala and Pranjal A Chitale and A K Raghavan and Varun Gumma and Sumanth Doddapaneni and Aswanth Kumar M and Janki Atul Nawale and Anupama Sujatha and Ratish Puduppully and Vivek Raghavan and Pratyush Kumar and Mitesh M Khapra and Raj Dabre and Anoop Kunchukuttan}, + issn = {2835-8856}, + journal = {Transactions on Machine Learning Research}, + note = {}, + title = {IndicTrans2: Towards High-Quality and Accessible Machine Translation Models for all 22 Scheduled Indian Languages}, + url = {https://openreview.net/forum?id=vfT4YuzAYA}, + year = {2023}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py b/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py index ee83b6f5ca..6dfcaaa564 100644 --- a/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py @@ -59,26 +59,26 @@ class IWSLT2017BitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{cettolo-etal-2017-overview, - title = "Overview of the {IWSLT} 2017 Evaluation Campaign", - author = {Cettolo, Mauro and - Federico, Marcello and - Bentivogli, Luisa and - Niehues, Jan and - St{\"u}ker, Sebastian and - Sudoh, Katsuhito and - Yoshino, Koichiro and - Federmann, Christian}, - editor = "Sakti, Sakriani and - Utiyama, Masao", - booktitle = "Proceedings of the 14th International Conference on Spoken Language Translation", - month = dec # " 14-15", - year = "2017", - address = "Tokyo, Japan", - publisher = "International Workshop on Spoken Language Translation", - url = "https://aclanthology.org/2017.iwslt-1.1", - pages = "2--14", + address = {Tokyo, Japan}, + author = {Cettolo, Mauro and +Federico, Marcello and +Bentivogli, Luisa and +Niehues, Jan and +St{\"u}ker, Sebastian and +Sudoh, Katsuhito and +Yoshino, Koichiro and +Federmann, Christian}, + booktitle = {Proceedings of the 14th International Conference on Spoken Language Translation}, + editor = {Sakti, Sakriani and +Utiyama, Masao}, + month = dec # { 14-15}, + pages = {2--14}, + publisher = {International Workshop on Spoken Language Translation}, + title = {Overview of the {IWSLT} 2017 Evaluation Campaign}, + url = {https://aclanthology.org/2017.iwslt-1.1}, + year = {2017}, } """, ) diff --git a/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py index 4f5169619b..2a19e1c82c 100644 --- a/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py @@ -120,14 +120,16 @@ class IndicGenBenchFloresBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation="""@misc{singh2024indicgenbench, - title={IndicGenBench: A Multilingual Benchmark to Evaluate Generation Capabilities of LLMs on Indic Languages}, - author={Harman Singh and Nitish Gupta and Shikhar Bharadwaj and Dinesh Tewari and Partha Talukdar}, - year={2024}, - eprint={2404.16816}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{singh2024indicgenbench, + archiveprefix = {arXiv}, + author = {Harman Singh and Nitish Gupta and Shikhar Bharadwaj and Dinesh Tewari and Partha Talukdar}, + eprint = {2404.16816}, + primaryclass = {cs.CL}, + title = {IndicGenBench: A Multilingual Benchmark to Evaluate Generation Capabilities of LLMs on Indic Languages}, + year = {2024}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/BitextMining/multilingual/LinceMTBitextMining.py b/mteb/tasks/BitextMining/multilingual/LinceMTBitextMining.py index 8abb8ce1ff..a0551b3eea 100644 --- a/mteb/tasks/BitextMining/multilingual/LinceMTBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/LinceMTBitextMining.py @@ -31,13 +31,13 @@ class LinceMTBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{aguilar2020lince, - title={LinCE: A Centralized Benchmark for Linguistic Code-switching Evaluation}, - author={Aguilar, Gustavo and Kar, Sudipta and Solorio, Thamar}, - booktitle={Proceedings of the Twelfth Language Resources and Evaluation Conference}, - pages={1803--1813}, - year={2020} - } - """, + bibtex_citation=r""" +@inproceedings{aguilar2020lince, + author = {Aguilar, Gustavo and Kar, Sudipta and Solorio, Thamar}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + pages = {1803--1813}, + title = {LinCE: A Centralized Benchmark for Linguistic Code-switching Evaluation}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/BitextMining/multilingual/NTREXBitextMining.py b/mteb/tasks/BitextMining/multilingual/NTREXBitextMining.py index 0137d9330d..99c0456517 100644 --- a/mteb/tasks/BitextMining/multilingual/NTREXBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NTREXBitextMining.py @@ -270,17 +270,17 @@ class NTREXBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{federmann-etal-2022-ntrex, - title = "{NTREX}-128 {--} News Test References for {MT} Evaluation of 128 Languages", - author = "Federmann, Christian and Kocmi, Tom and Xin, Ying", - booktitle = "Proceedings of the First Workshop on Scaling Up Multilingual Evaluation", - month = "nov", - year = "2022", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.sumeval-1.4", - pages = "21--24", + address = {Online}, + author = {Federmann, Christian and Kocmi, Tom and Xin, Ying}, + booktitle = {Proceedings of the First Workshop on Scaling Up Multilingual Evaluation}, + month = {nov}, + pages = {21--24}, + publisher = {Association for Computational Linguistics}, + title = {{NTREX}-128 {--} News Test References for {MT} Evaluation of 128 Languages}, + url = {https://aclanthology.org/2022.sumeval-1.4}, + year = {2022}, } """, ) diff --git a/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py b/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py index 4662833008..ec00149a34 100644 --- a/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py @@ -34,13 +34,13 @@ class NollySentiBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{shode2023nollysenti, - title={NollySenti: Leveraging Transfer Learning and Machine Translation for Nigerian Movie Sentiment Classification}, - author={Shode, Iyanuoluwa and Adelani, David Ifeoluwa and Peng, Jing and Feldman, Anna}, - booktitle={Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, - pages={986--998}, - year={2023} - } - """, + bibtex_citation=r""" +@inproceedings{shode2023nollysenti, + author = {Shode, Iyanuoluwa and Adelani, David Ifeoluwa and Peng, Jing and Feldman, Anna}, + booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, + pages = {986--998}, + title = {NollySenti: Leveraging Transfer Learning and Machine Translation for Nigerian Movie Sentiment Classification}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py index 81a880974c..c7c4c15bdf 100644 --- a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py @@ -26,12 +26,12 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{opus4, - title={OPUS-MT — Building open translation services for the World}, - author={Tiedemann, J{\"o}rg and Thottingal, Santhosh}, - booktitle={Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)}, - year={2020} + author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh}, + booktitle = {Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)}, + title = {OPUS-MT — Building open translation services for the World}, + year = {2020}, } """, prompt="Retrieve parallel sentences in Norwegian Bokmål and Nynorsk", diff --git a/mteb/tasks/BitextMining/multilingual/NusaTranslationBitextMining.py b/mteb/tasks/BitextMining/multilingual/NusaTranslationBitextMining.py index c328461746..02c1859b8f 100644 --- a/mteb/tasks/BitextMining/multilingual/NusaTranslationBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NusaTranslationBitextMining.py @@ -41,14 +41,13 @@ class NusaTranslationBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @inproceedings{cahyawijaya2023nusawrites, - title={NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages}, - author={Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and others}, - booktitle={Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={921--945}, - year={2023} - } - - """, + bibtex_citation=r""" +@inproceedings{cahyawijaya2023nusawrites, + author = {Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and others}, + booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {921--945}, + title = {NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/BitextMining/multilingual/NusaXBitextMining.py b/mteb/tasks/BitextMining/multilingual/NusaXBitextMining.py index ee609dac82..ac37841066 100644 --- a/mteb/tasks/BitextMining/multilingual/NusaXBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NusaXBitextMining.py @@ -41,21 +41,22 @@ class NusaXBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @inproceedings{winata2023nusax, - title={NusaX: Multilingual Parallel Sentiment Dataset for 10 Indonesian Local Languages}, - author={Winata, Genta Indra and Aji, Alham Fikri and Cahyawijaya, Samuel and Mahendra, Rahmad and Koto, Fajri and Romadhony, Ade and Kurniawan, Kemal and Moeljadi, David and Prasojo, Radityo Eko and Fung, Pascale and others}, - booktitle={Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics}, - pages={815--834}, - year={2023} - } - @misc{winata2024miners, - title={MINERS: Multilingual Language Models as Semantic Retrievers}, - author={Genta Indra Winata and Ruochen Zhang and David Ifeoluwa Adelani}, - year={2024}, - eprint={2406.07424}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@inproceedings{winata2023nusax, + author = {Winata, Genta Indra and Aji, Alham Fikri and Cahyawijaya, Samuel and Mahendra, Rahmad and Koto, Fajri and Romadhony, Ade and Kurniawan, Kemal and Moeljadi, David and Prasojo, Radityo Eko and Fung, Pascale and others}, + booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics}, + pages = {815--834}, + title = {NusaX: Multilingual Parallel Sentiment Dataset for 10 Indonesian Local Languages}, + year = {2023}, +} + +@misc{winata2024miners, + archiveprefix = {arXiv}, + author = {Genta Indra Winata and Ruochen Zhang and David Ifeoluwa Adelani}, + eprint = {2406.07424}, + primaryclass = {cs.CL}, + title = {MINERS: Multilingual Language Models as Semantic Retrievers}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/BitextMining/multilingual/PhincBitextMining.py b/mteb/tasks/BitextMining/multilingual/PhincBitextMining.py index c7fec75637..b4d9293834 100644 --- a/mteb/tasks/BitextMining/multilingual/PhincBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/PhincBitextMining.py @@ -31,13 +31,13 @@ class PhincBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{srivastava2020phinc, - title={PHINC: A Parallel Hinglish Social Media Code-Mixed Corpus for Machine Translation}, - author={Srivastava, Vivek and Singh, Mayank}, - booktitle={Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)}, - pages={41--49}, - year={2020} - } - """, + bibtex_citation=r""" +@inproceedings{srivastava2020phinc, + author = {Srivastava, Vivek and Singh, Mayank}, + booktitle = {Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)}, + pages = {41--49}, + title = {PHINC: A Parallel Hinglish Social Media Code-Mixed Corpus for Machine Translation}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index 4312332022..6f265b7261 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -145,11 +145,11 @@ class TatoebaBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], # No specific dialect mentioned sample_creation="found", - bibtex_citation=""" - @misc{tatoeba, - author = {Tatoeba community}, - title = {Tatoeba: Collection of sentences and translations}, - year = {2021}, - } - """, + bibtex_citation=r""" +@misc{tatoeba, + author = {Tatoeba community}, + title = {Tatoeba: Collection of sentences and translations}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/BitextMining/multilingual/WebFAQBitextMining.py b/mteb/tasks/BitextMining/multilingual/WebFAQBitextMining.py index c0561e01ab..8b5fe51976 100644 --- a/mteb/tasks/BitextMining/multilingual/WebFAQBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/WebFAQBitextMining.py @@ -222,15 +222,17 @@ class WebFAQBitextMiningQuestions(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated", - bibtex_citation="""@misc{dinzinger2025webfaq, - title={WebFAQ: A Multilingual Collection of Natural Q&A Datasets for Dense Retrieval}, - author={Michael Dinzinger and Laura Caspari and Kanishka Ghosh Dastidar and Jelena Mitrović and Michael Granitzer}, - year={2025}, - eprint={2502.20936}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2502.20936}, -}""", + bibtex_citation=r""" +@misc{dinzinger2025webfaq, + archiveprefix = {arXiv}, + author = {Michael Dinzinger and Laura Caspari and Kanishka Ghosh Dastidar and Jelena Mitrović and Michael Granitzer}, + eprint = {2502.20936}, + primaryclass = {cs.CL}, + title = {WebFAQ: A Multilingual Collection of Natural Q&A Datasets for Dense Retrieval}, + url = {https://arxiv.org/abs/2502.20936}, + year = {2025}, +} +""", ) def dataset_transform(self): @@ -276,15 +278,17 @@ class WebFAQBitextMiningQAs(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated", - bibtex_citation="""@misc{dinzinger2025webfaq, - title={WebFAQ: A Multilingual Collection of Natural Q&A Datasets for Dense Retrieval}, - author={Michael Dinzinger and Laura Caspari and Kanishka Ghosh Dastidar and Jelena Mitrović and Michael Granitzer}, - year={2025}, - eprint={2502.20936}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2502.20936}, -}""", + bibtex_citation=r""" +@misc{dinzinger2025webfaq, + archiveprefix = {arXiv}, + author = {Michael Dinzinger and Laura Caspari and Kanishka Ghosh Dastidar and Jelena Mitrović and Michael Granitzer}, + eprint = {2502.20936}, + primaryclass = {cs.CL}, + title = {WebFAQ: A Multilingual Collection of Natural Q&A Datasets for Dense Retrieval}, + url = {https://arxiv.org/abs/2502.20936}, + year = {2025}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py b/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py index b4072553b6..cdcfdb79d3 100644 --- a/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py +++ b/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py @@ -46,12 +46,12 @@ class SRNCorpusBitextMining(AbsTaskBitextMining, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @article{zwennicker2022towards, - title={Towards a general purpose machine translation system for Sranantongo}, - author={Zwennicker, Just and Stap, David}, - journal={arXiv preprint arXiv:2212.06383}, - year={2022} + author = {Zwennicker, Just and Stap, David}, + journal = {arXiv preprint arXiv:2212.06383}, + title = {Towards a general purpose machine translation system for Sranantongo}, + year = {2022}, } """, ) diff --git a/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py b/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py index ab32025167..c6047b1218 100644 --- a/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py +++ b/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py @@ -33,12 +33,14 @@ class VieMedEVBitextMining(AbsTaskBitextMining): annotations_creators="expert-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation="""@inproceedings{medev, - title = {{Improving Vietnamese-English Medical Machine Translation}}, - author = {Nhu Vo and Dat Quoc Nguyen and Dung D. Le and Massimo Piccardi and Wray Buntine}, - booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING)}, - year = {2024} -}""", + bibtex_citation=r""" +@inproceedings{medev, + author = {Nhu Vo and Dat Quoc Nguyen and Dung D. Le and Massimo Piccardi and Wray Buntine}, + booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING)}, + title = {{Improving Vietnamese-English Medical Machine Translation}}, + year = {2024}, +} +""", ) def dataset_transform(self): @@ -54,9 +56,9 @@ def dataset_transform(self): # Pairs are in two halves en_sentences = all_texts[:mid_index] vie_sentences = all_texts[mid_index:] - assert len(en_sentences) == len(vie_sentences), ( - "The split does not result in equal halves." - ) + assert len(en_sentences) == len( + vie_sentences + ), "The split does not result in equal halves." # Downsample indices = list(range(len(en_sentences))) @@ -64,9 +66,9 @@ def dataset_transform(self): sample_indices = indices[:TEST_SAMPLES] en_sentences = [en_sentences[i] for i in sample_indices] vie_sentences = [vie_sentences[i] for i in sample_indices] - assert len(en_sentences) == len(vie_sentences) == TEST_SAMPLES, ( - f"Exceeded {TEST_SAMPLES} samples for 'test' split." - ) + assert ( + len(en_sentences) == len(vie_sentences) == TEST_SAMPLES + ), f"Exceeded {TEST_SAMPLES} samples for 'test' split." # Return dataset ds["test"] = datasets.Dataset.from_dict( diff --git a/mteb/tasks/Classification/ara/AJGT.py b/mteb/tasks/Classification/ara/AJGT.py index 2baa389794..987415df7c 100644 --- a/mteb/tasks/Classification/ara/AJGT.py +++ b/mteb/tasks/Classification/ara/AJGT.py @@ -26,14 +26,14 @@ class AJGT(AbsTaskClassification): annotations_creators="human-annotated", dialect=["ara-arab-MSA", "ara-arab-JO"], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{alomari2017arabic, - title={Arabic tweets sentimental analysis using machine learning}, - author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled}, - booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems}, - pages={602--610}, - year={2017}, - organization={Springer} + author = {Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled}, + booktitle = {International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems}, + organization = {Springer}, + pages = {602--610}, + title = {Arabic tweets sentimental analysis using machine learning}, + year = {2017}, } """, ) diff --git a/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py b/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py index 24b7bc33fc..7743f47531 100644 --- a/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py @@ -27,14 +27,14 @@ class HotelReviewSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=["ara-arab-EG", "ara-arab-JO", "ara-arab-LB", "ara-arab-SA"], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @article{elnagar2018hotel, - title={Hotel Arabic-reviews dataset construction for sentiment analysis applications}, - author={Elnagar, Ashraf and Khalifa, Yasmin S and Einea, Anas}, - journal={Intelligent natural language processing: Trends and applications}, - pages={35--52}, - year={2018}, - publisher={Springer} + author = {Elnagar, Ashraf and Khalifa, Yasmin S and Einea, Anas}, + journal = {Intelligent natural language processing: Trends and applications}, + pages = {35--52}, + publisher = {Springer}, + title = {Hotel Arabic-reviews dataset construction for sentiment analysis applications}, + year = {2018}, } """, ) diff --git a/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py b/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py index 363d0526d7..f5240e6702 100644 --- a/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ara/RestaurantReviewSentimentClassification.py @@ -26,14 +26,14 @@ class RestaurantReviewSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=["ara-arab-EG", "ara-arab-JO", "ara-arab-SA"], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{elsahar2015building, - title={Building large arabic multi-domain resources for sentiment analysis}, - author={ElSahar, Hady and El-Beltagy, Samhaa R}, - booktitle={International conference on intelligent text processing and computational linguistics}, - pages={23--34}, - year={2015}, - organization={Springer} + author = {ElSahar, Hady and El-Beltagy, Samhaa R}, + booktitle = {International conference on intelligent text processing and computational linguistics}, + organization = {Springer}, + pages = {23--34}, + title = {Building large arabic multi-domain resources for sentiment analysis}, + year = {2015}, } """, ) diff --git a/mteb/tasks/Classification/ara/TweetEmotionClassification.py b/mteb/tasks/Classification/ara/TweetEmotionClassification.py index e7fb8687ac..e2da60f449 100644 --- a/mteb/tasks/Classification/ara/TweetEmotionClassification.py +++ b/mteb/tasks/Classification/ara/TweetEmotionClassification.py @@ -27,14 +27,14 @@ class TweetEmotionClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=["ara-arab-EG", "ara-arab-LB", "ara-arab-JO", "ara-arab-SA"], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{al2018emotional, - title={Emotional tone detection in arabic tweets}, - author={Al-Khatib, Amr and El-Beltagy, Samhaa R}, - booktitle={Computational Linguistics and Intelligent Text Processing: 18th International Conference, CICLing 2017, Budapest, Hungary, April 17--23, 2017, Revised Selected Papers, Part II 18}, - pages={105--114}, - year={2018}, - organization={Springer} + author = {Al-Khatib, Amr and El-Beltagy, Samhaa R}, + booktitle = {Computational Linguistics and Intelligent Text Processing: 18th International Conference, CICLing 2017, Budapest, Hungary, April 17--23, 2017, Revised Selected Papers, Part II 18}, + organization = {Springer}, + pages = {105--114}, + title = {Emotional tone detection in arabic tweets}, + year = {2018}, } """, ) diff --git a/mteb/tasks/Classification/ara/TweetSarcasmClassification.py b/mteb/tasks/Classification/ara/TweetSarcasmClassification.py index 9c5f141d0b..2f7fb95f52 100644 --- a/mteb/tasks/Classification/ara/TweetSarcasmClassification.py +++ b/mteb/tasks/Classification/ara/TweetSarcasmClassification.py @@ -26,26 +26,26 @@ class TweetSarcasmClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=["ara-arab-EG", "ara-arab-LB", "ara-arab-MA", "ara-arab-SA"], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{abu-farha-magdy-2020-arabic, - title = "From {A}rabic Sentiment Analysis to Sarcasm Detection: The {A}r{S}arcasm Dataset", - author = "Abu Farha, Ibrahim and - Magdy, Walid", - editor = "Al-Khalifa, Hend and - Magdy, Walid and - Darwish, Kareem and - Elsayed, Tamer and - Mubarak, Hamdy", - booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resource Association", - url = "https://aclanthology.org/2020.osact-1.5", - pages = "32--39", - abstract = "Sarcasm is one of the main challenges for sentiment analysis systems. Its complexity comes from the expression of opinion using implicit indirect phrasing. In this paper, we present ArSarcasm, an Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. The dataset contains 10,547 tweets, 16{\%} of which are sarcastic. In addition to sarcasm the data was annotated for sentiment and dialects. Our analysis shows the highly subjective nature of these tasks, which is demonstrated by the shift in sentiment labels based on annotators{'} biases. Experiments show the degradation of state-of-the-art sentiment analysers when faced with sarcastic content. Finally, we train a deep learning model for sarcasm detection using BiLSTM. The model achieves an F1 score of 0.46, which shows the challenging nature of the task, and should act as a basic baseline for future research on our dataset.", - language = "English", - ISBN = "979-10-95546-51-1", + abstract = {Sarcasm is one of the main challenges for sentiment analysis systems. Its complexity comes from the expression of opinion using implicit indirect phrasing. In this paper, we present ArSarcasm, an Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. The dataset contains 10,547 tweets, 16{\%} of which are sarcastic. In addition to sarcasm the data was annotated for sentiment and dialects. Our analysis shows the highly subjective nature of these tasks, which is demonstrated by the shift in sentiment labels based on annotators{'} biases. Experiments show the degradation of state-of-the-art sentiment analysers when faced with sarcastic content. Finally, we train a deep learning model for sarcasm detection using BiLSTM. The model achieves an F1 score of 0.46, which shows the challenging nature of the task, and should act as a basic baseline for future research on our dataset.}, + address = {Marseille, France}, + author = {Abu Farha, Ibrahim and +Magdy, Walid}, + booktitle = {Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection}, + editor = {Al-Khalifa, Hend and +Magdy, Walid and +Darwish, Kareem and +Elsayed, Tamer and +Mubarak, Hamdy}, + isbn = {979-10-95546-51-1}, + language = {English}, + month = may, + pages = {32--39}, + publisher = {European Language Resource Association}, + title = {From {A}rabic Sentiment Analysis to Sarcasm Detection: The {A}r{S}arcasm Dataset}, + url = {https://aclanthology.org/2020.osact-1.5}, + year = {2020}, } """, ) diff --git a/mteb/tasks/Classification/ben/BengaliDocumentClassification.py b/mteb/tasks/Classification/ben/BengaliDocumentClassification.py index 145eba57ab..c1fe72afee 100644 --- a/mteb/tasks/Classification/ben/BengaliDocumentClassification.py +++ b/mteb/tasks/Classification/ben/BengaliDocumentClassification.py @@ -26,22 +26,22 @@ class BengaliDocumentClassification(AbsTaskClassification): license="cc-by-nc-sa-4.0", annotations_creators="derived", sample_creation="found", - bibtex_citation=""" - @inproceedings{akash-etal-2023-shironaam, - title = "Shironaam: {B}engali News Headline Generation using Auxiliary Information", - author = "Akash, Abu Ubaida and - Nayeem, Mir Tafseer and - Shohan, Faisal Tareque and - Islam, Tanvir", - booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics", - month = may, - year = "2023", - address = "Dubrovnik, Croatia", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.eacl-main.4", - pages = "52--67" - } - """, + bibtex_citation=r""" +@inproceedings{akash-etal-2023-shironaam, + address = {Dubrovnik, Croatia}, + author = {Akash, Abu Ubaida and +Nayeem, Mir Tafseer and +Shohan, Faisal Tareque and +Islam, Tanvir}, + booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics}, + month = may, + pages = {52--67}, + publisher = {Association for Computational Linguistics}, + title = {Shironaam: {B}engali News Headline Generation using Auxiliary Information}, + url = {https://aclanthology.org/2023.eacl-main.4}, + year = {2023}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py b/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py index 86763f0e50..9645415a58 100644 --- a/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py +++ b/mteb/tasks/Classification/ben/BengaliHateSpeechClassification.py @@ -26,12 +26,13 @@ class BengaliHateSpeechClassification(AbsTaskClassification): license="mit", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation="""@inproceedings{karim2020BengaliNLP, - title={Classification Benchmarks for Under-resourced Bengali Language based on Multichannel Convolutional-LSTM Network}, - author={Karim, Md. Rezaul and Chakravarti, Bharathi Raja and P. McCrae, John and Cochez, Michael}, - booktitle={7th IEEE International Conference on Data Science and Advanced Analytics (IEEE DSAA,2020)}, - publisher={IEEE}, - year={2020} + bibtex_citation=r""" +@inproceedings{karim2020BengaliNLP, + author = {Karim, Md. Rezaul and Chakravarti, Bharathi Raja and P. McCrae, John and Cochez, Michael}, + booktitle = {7th IEEE International Conference on Data Science and Advanced Analytics (IEEE DSAA,2020)}, + publisher = {IEEE}, + title = {Classification Benchmarks for Under-resourced Bengali Language based on Multichannel Convolutional-LSTM Network}, + year = {2020}, } """, ) diff --git a/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py b/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py index 87af91c8a8..b476bb4f58 100644 --- a/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py +++ b/mteb/tasks/Classification/ben/BengaliSentimentAnalysis.py @@ -26,13 +26,15 @@ class BengaliSentimentAnalysis(AbsTaskClassification): license="cc-by-4.0", annotations_creators="human-annotated", sample_creation="found", - bibtex_citation="""@inproceedings{sazzed2020cross, - title={Cross-lingual sentiment classification in low-resource Bengali language}, - author={Sazzed, Salim}, - booktitle={Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)}, - pages={50--60}, - year={2020} - }""", + bibtex_citation=r""" +@inproceedings{sazzed2020cross, + author = {Sazzed, Salim}, + booktitle = {Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)}, + pages = {50--60}, + title = {Cross-lingual sentiment classification in low-resource Bengali language}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py b/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py index 7878fa89e2..d134f9af86 100644 --- a/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py +++ b/mteb/tasks/Classification/bul/BulgarianStoreReviewSentimentClassfication.py @@ -26,14 +26,15 @@ class BulgarianStoreReviewSentimentClassfication(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@data{DVN/TXIK9P_2018, -author = {Georgieva-Trifonova, Tsvetanka and Stefanova, Milena and Kalchev, Stefan}, -publisher = {Harvard Dataverse}, -title = {{Dataset for ``Customer Feedback Text Analysis for Online Stores Reviews in Bulgarian''}}, -year = {2018}, -version = {V1}, -doi = {10.7910/DVN/TXIK9P}, -url = {https://doi.org/10.7910/DVN/TXIK9P} + bibtex_citation=r""" +@data{DVN/TXIK9P_2018, + author = {Georgieva-Trifonova, Tsvetanka and Stefanova, Milena and Kalchev, Stefan}, + doi = {10.7910/DVN/TXIK9P}, + publisher = {Harvard Dataverse}, + title = {{Dataset for ``Customer Feedback Text Analysis for Online Stores Reviews in Bulgarian''}}, + url = {https://doi.org/10.7910/DVN/TXIK9P}, + version = {V1}, + year = {2018}, } """, ) diff --git a/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py b/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py index ca081f9a61..9b1f68f0a3 100644 --- a/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ces/CSFDCZMovieReviewSentimentClassification.py @@ -26,14 +26,14 @@ class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @misc{štefánik2023resources, - title={Resources and Few-shot Learners for In-context Learning in Slavic Languages}, - author={Michal Štefánik and Marek Kadlčík and Piotr Gramacki and Petr Sojka}, - year={2023}, - eprint={2304.01922}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + archiveprefix = {arXiv}, + author = {Michal Štefánik and Marek Kadlčík and Piotr Gramacki and Petr Sojka}, + eprint = {2304.01922}, + primaryclass = {cs.CL}, + title = {Resources and Few-shot Learners for In-context Learning in Slavic Languages}, + year = {2023}, } """, ) diff --git a/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py b/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py index 8705a73c39..cd29ac4353 100644 --- a/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ces/CzechProductReviewSentimentClassification.py @@ -26,24 +26,24 @@ class CzechProductReviewSentimentClassification(AbsTaskClassification): license="cc-by-nc-sa-4.0", annotations_creators="derived", sample_creation="found", - bibtex_citation=""" - @inproceedings{habernal-etal-2013-sentiment, - title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning", - author = "Habernal, Ivan and - Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and - Steinberger, Josef", - editor = "Balahur, Alexandra and - van der Goot, Erik and - Montoyo, Andres", - booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis", - month = jun, - year = "2013", - address = "Atlanta, Georgia", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/W13-1609", - pages = "65--74", - } - """, + bibtex_citation=r""" +@inproceedings{habernal-etal-2013-sentiment, + address = {Atlanta, Georgia}, + author = {Habernal, Ivan and +Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and +Steinberger, Josef}, + booktitle = {Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis}, + editor = {Balahur, Alexandra and +van der Goot, Erik and +Montoyo, Andres}, + month = jun, + pages = {65--74}, + publisher = {Association for Computational Linguistics}, + title = {Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning}, + url = {https://aclanthology.org/W13-1609}, + year = {2013}, +} +""", ) samples_per_label = 16 diff --git a/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py b/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py index 0e61196b19..333cd3aa4a 100644 --- a/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py +++ b/mteb/tasks/Classification/ces/CzechSoMeSentimentClassification.py @@ -26,24 +26,24 @@ class CzechSoMeSentimentClassification(AbsTaskClassification): license="cc-by-nc-sa-4.0", annotations_creators="derived", sample_creation="found", - bibtex_citation=""" - @inproceedings{habernal-etal-2013-sentiment, - title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning", - author = "Habernal, Ivan and - Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and - Steinberger, Josef", - editor = "Balahur, Alexandra and - van der Goot, Erik and - Montoyo, Andres", - booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis", - month = jun, - year = "2013", - address = "Atlanta, Georgia", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/W13-1609", - pages = "65--74", - } - """, + bibtex_citation=r""" +@inproceedings{habernal-etal-2013-sentiment, + address = {Atlanta, Georgia}, + author = {Habernal, Ivan and +Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and +Steinberger, Josef}, + booktitle = {Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis}, + editor = {Balahur, Alexandra and +van der Goot, Erik and +Montoyo, Andres}, + month = jun, + pages = {65--74}, + publisher = {Association for Computational Linguistics}, + title = {Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning}, + url = {https://aclanthology.org/W13-1609}, + year = {2013}, +} +""", ) samples_per_label = 16 diff --git a/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py b/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py index 18bcc7e10e..e2b1f10efc 100644 --- a/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py +++ b/mteb/tasks/Classification/ces/CzechSubjectivityClassification.py @@ -26,17 +26,18 @@ class CzechSubjectivityClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{priban-steinberger-2022-czech, - title = "\{C\}zech Dataset for Cross-lingual Subjectivity Classification", - author = "P{\v{r}}ib{\'a}{\v{n}}, Pavel and - Steinberger, Josef", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.148", - pages = "1381--1391", + bibtex_citation=r""" +@inproceedings{priban-steinberger-2022-czech, + address = {Marseille, France}, + author = {P{\v{r}}ib{\'a}{\v{n}}, Pavel and +Steinberger, Josef}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + month = jun, + pages = {1381--1391}, + publisher = {European Language Resources Association}, + title = {\{C\}zech Dataset for Cross-lingual Subjectivity Classification}, + url = {https://aclanthology.org/2022.lrec-1.148}, + year = {2022}, } """, ) diff --git a/mteb/tasks/Classification/dan/AngryTweetsClassification.py b/mteb/tasks/Classification/dan/AngryTweetsClassification.py index b22efde7a5..886612db48 100644 --- a/mteb/tasks/Classification/dan/AngryTweetsClassification.py +++ b/mteb/tasks/Classification/dan/AngryTweetsClassification.py @@ -26,13 +26,15 @@ class AngryTweetsClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{pauli2021danlp, - title={DaNLP: An open-source toolkit for Danish Natural Language Processing}, - author={Pauli, Amalie Brogaard and Barrett, Maria and Lacroix, Oph{\'e}lie and Hvingelby, Rasmus}, - booktitle={Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)}, - pages={460--466}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{pauli2021danlp, + author = {Pauli, Amalie Brogaard and Barrett, Maria and Lacroix, Oph{\'e}lie and Hvingelby, Rasmus}, + booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)}, + pages = {460--466}, + title = {DaNLP: An open-source toolkit for Danish Natural Language Processing}, + year = {2021}, +} +""", prompt="Classify Danish tweets by sentiment. (positive, negative, neutral).", ) diff --git a/mteb/tasks/Classification/dan/DKHateClassification.py b/mteb/tasks/Classification/dan/DKHateClassification.py index fb6c04cc40..e67e3ebee0 100644 --- a/mteb/tasks/Classification/dan/DKHateClassification.py +++ b/mteb/tasks/Classification/dan/DKHateClassification.py @@ -26,35 +26,37 @@ class DKHateClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{sigurbergsson-derczynski-2020-offensive, - title = "Offensive Language and Hate Speech Detection for {D}anish", - author = "Sigurbergsson, Gudbjartur Ingi and - Derczynski, Leon", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.430", - pages = "3498--3508", - abstract = "The presence of offensive language on social media platforms and the implications this poses is becoming a major concern in modern society. Given the enormous amount of content created every day, automatic methods are required to detect and deal with this type of content. Until now, most of the research has focused on solving the problem for the English language, while the problem is multilingual. We construct a Danish dataset DKhate containing user-generated comments from various social media platforms, and to our knowledge, the first of its kind, annotated for various types and target of offensive language. We develop four automatic classification systems, each designed to work for both the English and the Danish language. In the detection of offensive language in English, the best performing system achieves a macro averaged F1-score of 0.74, and the best performing system for Danish achieves a macro averaged F1-score of 0.70. In the detection of whether or not an offensive post is targeted, the best performing system for English achieves a macro averaged F1-score of 0.62, while the best performing system for Danish achieves a macro averaged F1-score of 0.73. Finally, in the detection of the target type in a targeted offensive post, the best performing system for English achieves a macro averaged F1-score of 0.56, and the best performing system for Danish achieves a macro averaged F1-score of 0.63. Our work for both the English and the Danish language captures the type and targets of offensive language, and present automatic methods for detecting different kinds of offensive language such as hate speech and cyberbullying.", - language = "English", - ISBN = "979-10-95546-34-4", -}""", + bibtex_citation=r""" +@inproceedings{sigurbergsson-derczynski-2020-offensive, + abstract = {The presence of offensive language on social media platforms and the implications this poses is becoming a major concern in modern society. Given the enormous amount of content created every day, automatic methods are required to detect and deal with this type of content. Until now, most of the research has focused on solving the problem for the English language, while the problem is multilingual. We construct a Danish dataset DKhate containing user-generated comments from various social media platforms, and to our knowledge, the first of its kind, annotated for various types and target of offensive language. We develop four automatic classification systems, each designed to work for both the English and the Danish language. In the detection of offensive language in English, the best performing system achieves a macro averaged F1-score of 0.74, and the best performing system for Danish achieves a macro averaged F1-score of 0.70. In the detection of whether or not an offensive post is targeted, the best performing system for English achieves a macro averaged F1-score of 0.62, while the best performing system for Danish achieves a macro averaged F1-score of 0.73. Finally, in the detection of the target type in a targeted offensive post, the best performing system for English achieves a macro averaged F1-score of 0.56, and the best performing system for Danish achieves a macro averaged F1-score of 0.63. Our work for both the English and the Danish language captures the type and targets of offensive language, and present automatic methods for detecting different kinds of offensive language such as hate speech and cyberbullying.}, + address = {Marseille, France}, + author = {Sigurbergsson, Gudbjartur Ingi and +Derczynski, Leon}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + isbn = {979-10-95546-34-4}, + language = {English}, + month = may, + pages = {3498--3508}, + publisher = {European Language Resources Association}, + title = {Offensive Language and Hate Speech Detection for {D}anish}, + url = {https://aclanthology.org/2020.lrec-1.430}, + year = {2020}, +} +""", prompt="Classify Danish tweets based on offensiveness (offensive, not offensive)", ) diff --git a/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py b/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py index 8f82e91ecc..c0bac1528e 100644 --- a/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py +++ b/mteb/tasks/Classification/dan/DanishPoliticalCommentsClassification.py @@ -30,12 +30,14 @@ class DanishPoliticalCommentsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@techreport{SAMsentiment, - author={Mads Guldborg Kjeldgaard Kongsbak and Steffan Eybye Christensen and Lucas Høyberg Puvis~de~Chavannes and Peter Due Jensen}, - title={Sentiment Analysis Multitool, SAM}, - year={2019}, - institution={IT University of Copenhagen}, -}""", + bibtex_citation=r""" +@techreport{SAMsentiment, + author = {Mads Guldborg Kjeldgaard Kongsbak and Steffan Eybye Christensen and Lucas Høyberg Puvis~de~Chavannes and Peter Due Jensen}, + institution = {IT University of Copenhagen}, + title = {Sentiment Analysis Multitool, SAM}, + year = {2019}, +} +""", prompt="Classify Danish political comments for sentiment", ) diff --git a/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py b/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py index b28396869e..c1eb16d190 100644 --- a/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py +++ b/mteb/tasks/Classification/dan/DdiscoCohesionClassification.py @@ -26,36 +26,36 @@ class DdiscoCohesionClassification(AbsTaskClassification): license="cc-by-sa-3.0", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation=""" - @inproceedings{flansmose-mikkelsen-etal-2022-ddisco, - title = "{DD}is{C}o: A Discourse Coherence Dataset for {D}anish", - author = "Flansmose Mikkelsen, Linea and - Kinch, Oliver and - Jess Pedersen, Anders and - Lacroix, Oph{\'e}lie", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.260", - pages = "2440--2445", - abstract = "To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.", + bibtex_citation=r""" +@inproceedings{flansmose-mikkelsen-etal-2022-ddisco, + abstract = {To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.}, + address = {Marseille, France}, + author = {Flansmose Mikkelsen, Linea and +Kinch, Oliver and +Jess Pedersen, Anders and +Lacroix, Oph{\'e}lie}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Odijk, Jan and +Piperidis, Stelios}, + month = jun, + pages = {2440--2445}, + publisher = {European Language Resources Association}, + title = {{DD}is{C}o: A Discourse Coherence Dataset for {D}anish}, + url = {https://aclanthology.org/2022.lrec-1.260}, + year = {2022}, } - """, +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/dan/LccSentimentClassification.py b/mteb/tasks/Classification/dan/LccSentimentClassification.py index 39b974dcd3..8ff87f3ec4 100644 --- a/mteb/tasks/Classification/dan/LccSentimentClassification.py +++ b/mteb/tasks/Classification/dan/LccSentimentClassification.py @@ -26,26 +26,28 @@ class LccSentimentClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{quasthoff-etal-2006-corpus, - title = "Corpus Portal for Search in Monolingual Corpora", - author = "Quasthoff, Uwe and - Richter, Matthias and - Biemann, Christian", - editor = "Calzolari, Nicoletta and - Choukri, Khalid and - Gangemi, Aldo and - Maegaard, Bente and - Mariani, Joseph and - Odijk, Jan and - Tapias, Daniel", - booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)", - month = may, - year = "2006", - address = "Genoa, Italy", - publisher = "European Language Resources Association (ELRA)", - url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/641_pdf.pdf", - abstract = "A simple and flexible schema for storing and presenting monolingual language resources is proposed. In this format, data for 18 different languages is already available in various sizes. The data is provided free of charge for online use and download. The main target is to ease the application of algorithms for monolingual and interlingual studies.", -}""", + bibtex_citation=r""" +@inproceedings{quasthoff-etal-2006-corpus, + abstract = {A simple and flexible schema for storing and presenting monolingual language resources is proposed. In this format, data for 18 different languages is already available in various sizes. The data is provided free of charge for online use and download. The main target is to ease the application of algorithms for monolingual and interlingual studies.}, + address = {Genoa, Italy}, + author = {Quasthoff, Uwe and +Richter, Matthias and +Biemann, Christian}, + booktitle = {Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)}, + editor = {Calzolari, Nicoletta and +Choukri, Khalid and +Gangemi, Aldo and +Maegaard, Bente and +Mariani, Joseph and +Odijk, Jan and +Tapias, Daniel}, + month = may, + publisher = {European Language Resources Association (ELRA)}, + title = {Corpus Portal for Search in Monolingual Corpora}, + url = {http://www.lrec-conf.org/proceedings/lrec2006/pdf/641_pdf.pdf}, + year = {2006}, +} +""", prompt="Classify texts based on sentiment", ) diff --git a/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py b/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py index 02cbe51f5f..979a70c707 100644 --- a/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py +++ b/mteb/tasks/Classification/deu/GermanPoliticiansTwitterSentimentClassification.py @@ -26,28 +26,28 @@ class GermanPoliticiansTwitterSentimentClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{schmidt-etal-2022-sentiment, - title = "Sentiment Analysis on {T}witter for the Major {G}erman Parties during the 2021 {G}erman Federal Election", - author = "Schmidt, Thomas and - Fehle, Jakob and - Weissenbacher, Maximilian and - Richter, Jonathan and - Gottschalk, Philipp and - Wolff, Christian", - editor = "Schaefer, Robin and - Bai, Xiaoyu and - Stede, Manfred and - Zesch, Torsten", - booktitle = "Proceedings of the 18th Conference on Natural Language Processing (KONVENS 2022)", - month = "12--15 " # sep, - year = "2022", - address = "Potsdam, Germany", - publisher = "KONVENS 2022 Organizers", - url = "https://aclanthology.org/2022.konvens-1.9", - pages = "74--87", - } - """, + bibtex_citation=r""" +@inproceedings{schmidt-etal-2022-sentiment, + address = {Potsdam, Germany}, + author = {Schmidt, Thomas and +Fehle, Jakob and +Weissenbacher, Maximilian and +Richter, Jonathan and +Gottschalk, Philipp and +Wolff, Christian}, + booktitle = {Proceedings of the 18th Conference on Natural Language Processing (KONVENS 2022)}, + editor = {Schaefer, Robin and +Bai, Xiaoyu and +Stede, Manfred and +Zesch, Torsten}, + month = {12--15 } # sep, + pages = {74--87}, + publisher = {KONVENS 2022 Organizers}, + title = {Sentiment Analysis on {T}witter for the Major {G}erman Parties during the 2021 {G}erman Federal Election}, + url = {https://aclanthology.org/2022.konvens-1.9}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/deu/TenKGnadClassification.py b/mteb/tasks/Classification/deu/TenKGnadClassification.py index 592d66c983..12fe0223ff 100644 --- a/mteb/tasks/Classification/deu/TenKGnadClassification.py +++ b/mteb/tasks/Classification/deu/TenKGnadClassification.py @@ -27,16 +27,16 @@ class TenKGnadClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @InProceedings{Schabus2017, - Author = {Dietmar Schabus and Marcin Skowron and Martin Trapp}, - Title = {One Million Posts: A Data Set of German Online Discussions}, - Booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)}, - Pages = {1241--1244}, - Year = {2017}, - Address = {Tokyo, Japan}, - Doi = {10.1145/3077136.3080711}, - Month = aug - } - """, + bibtex_citation=r""" +@inproceedings{Schabus2017, + address = {Tokyo, Japan}, + author = {Dietmar Schabus and Marcin Skowron and Martin Trapp}, + booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)}, + doi = {10.1145/3077136.3080711}, + month = aug, + pages = {1241--1244}, + title = {One Million Posts: A Data Set of German Online Discussions}, + year = {2017}, +} +""", ) diff --git a/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py b/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py index 29fb9bbb90..008d454a50 100644 --- a/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py +++ b/mteb/tasks/Classification/ell/GreekLegalCodeClassification.py @@ -29,16 +29,17 @@ class GreekLegalCodeClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{papaloukas-etal-2021-glc, - title = "Multi-granular Legal Topic Classification on Greek Legislation", - author = "Papaloukas, Christos and Chalkidis, Ilias and Athinaios, Konstantinos and Pantazi, Despina-Athanasia and Koubarakis, Manolis", - booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021", - year = "2021", - address = "Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://arxiv.org/abs/2109.15298", - doi = "10.48550/arXiv.2109.15298", - pages = "63--75" + bibtex_citation=r""" +@inproceedings{papaloukas-etal-2021-glc, + address = {Punta Cana, Dominican Republic}, + author = {Papaloukas, Christos and Chalkidis, Ilias and Athinaios, Konstantinos and Pantazi, Despina-Athanasia and Koubarakis, Manolis}, + booktitle = {Proceedings of the Natural Legal Language Processing Workshop 2021}, + doi = {10.48550/arXiv.2109.15298}, + pages = {63--75}, + publisher = {Association for Computational Linguistics}, + title = {Multi-granular Legal Topic Classification on Greek Legislation}, + url = {https://arxiv.org/abs/2109.15298}, + year = {2021}, } """, ) diff --git a/mteb/tasks/Classification/eng/AmazonPolarityClassification.py b/mteb/tasks/Classification/eng/AmazonPolarityClassification.py index 5f787e1af6..3c5b1350f1 100644 --- a/mteb/tasks/Classification/eng/AmazonPolarityClassification.py +++ b/mteb/tasks/Classification/eng/AmazonPolarityClassification.py @@ -29,12 +29,14 @@ class AmazonPolarityClassification(AbsTaskClassification): license="apache-2.0", dialect=[], sample_creation="found", - bibtex_citation="""@article{McAuley2013HiddenFA, - title={Hidden factors and hidden topics: understanding rating dimensions with review text}, - author={Julian McAuley and Jure Leskovec}, - journal={Proceedings of the 7th ACM conference on Recommender systems}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:6440341} -}""", + bibtex_citation=r""" +@article{McAuley2013HiddenFA, + author = {Julian McAuley and Jure Leskovec}, + journal = {Proceedings of the 7th ACM conference on Recommender systems}, + title = {Hidden factors and hidden topics: understanding rating dimensions with review text}, + url = {https://api.semanticscholar.org/CorpusID:6440341}, + year = {2013}, +} +""", prompt="Classify Amazon reviews into positive or negative sentiment", ) diff --git a/mteb/tasks/Classification/eng/ArxivClassification.py b/mteb/tasks/Classification/eng/ArxivClassification.py index ae13b6f489..943f1c1cfe 100644 --- a/mteb/tasks/Classification/eng/ArxivClassification.py +++ b/mteb/tasks/Classification/eng/ArxivClassification.py @@ -27,14 +27,16 @@ class ArxivClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@ARTICLE{8675939, - author={He, Jun and Wang, Liqun and Liu, Liu and Feng, Jiao and Wu, Hao}, - journal={IEEE Access}, - title={Long Document Classification From Local Word Glimpses via Recurrent Attention Learning}, - year={2019}, - volume={7}, - number={}, - pages={40707-40718}, - doi={10.1109/ACCESS.2019.2907992} - }""", + bibtex_citation=r""" +@article{8675939, + author = {He, Jun and Wang, Liqun and Liu, Liu and Feng, Jiao and Wu, Hao}, + doi = {10.1109/ACCESS.2019.2907992}, + journal = {IEEE Access}, + number = {}, + pages = {40707-40718}, + title = {Long Document Classification From Local Word Glimpses via Recurrent Attention Learning}, + volume = {7}, + year = {2019}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/Banking77Classification.py b/mteb/tasks/Classification/eng/Banking77Classification.py index 5b6db45c64..5581df7fb0 100644 --- a/mteb/tasks/Classification/eng/Banking77Classification.py +++ b/mteb/tasks/Classification/eng/Banking77Classification.py @@ -29,29 +29,31 @@ class Banking77Classification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{casanueva-etal-2020-efficient, - title = "Efficient Intent Detection with Dual Sentence Encoders", - author = "Casanueva, I{\~n}igo and - Tem{\v{c}}inas, Tadas and - Gerz, Daniela and - Henderson, Matthew and - Vuli{\'c}, Ivan", - editor = "Wen, Tsung-Hsien and - Celikyilmaz, Asli and - Yu, Zhou and - Papangelis, Alexandros and - Eric, Mihail and - Kumar, Anuj and - Casanueva, I{\~n}igo and - Shah, Rushin", - booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI", - month = jul, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.nlp4convai-1.5", - doi = "10.18653/v1/2020.nlp4convai-1.5", - pages = "38--45", -}""", + bibtex_citation=r""" +@inproceedings{casanueva-etal-2020-efficient, + address = {Online}, + author = {Casanueva, I{\~n}igo and +Tem{\v{c}}inas, Tadas and +Gerz, Daniela and +Henderson, Matthew and +Vuli{\'c}, Ivan}, + booktitle = {Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI}, + doi = {10.18653/v1/2020.nlp4convai-1.5}, + editor = {Wen, Tsung-Hsien and +Celikyilmaz, Asli and +Yu, Zhou and +Papangelis, Alexandros and +Eric, Mihail and +Kumar, Anuj and +Casanueva, I{\~n}igo and +Shah, Rushin}, + month = jul, + pages = {38--45}, + publisher = {Association for Computational Linguistics}, + title = {Efficient Intent Detection with Dual Sentence Encoders}, + url = {https://aclanthology.org/2020.nlp4convai-1.5}, + year = {2020}, +} +""", prompt="Given a online banking query, find the corresponding intents", ) diff --git a/mteb/tasks/Classification/eng/DBpediaClassification.py b/mteb/tasks/Classification/eng/DBpediaClassification.py index ac7ee41ae8..51904a4c08 100644 --- a/mteb/tasks/Classification/eng/DBpediaClassification.py +++ b/mteb/tasks/Classification/eng/DBpediaClassification.py @@ -26,19 +26,19 @@ class DBpediaClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{NIPS2015_250cf8b5, - author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Character-level Convolutional Networks for Text Classification}, - url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, - volume = {28}, - year = {2015} - } - """, + bibtex_citation=r""" +@inproceedings{NIPS2015_250cf8b5, + author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Character-level Convolutional Networks for Text Classification}, + url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, + volume = {28}, + year = {2015}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/eng/EmotionClassification.py b/mteb/tasks/Classification/eng/EmotionClassification.py index 05133cb17f..d04d8143a6 100644 --- a/mteb/tasks/Classification/eng/EmotionClassification.py +++ b/mteb/tasks/Classification/eng/EmotionClassification.py @@ -29,27 +29,29 @@ class EmotionClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{saravia-etal-2018-carer, - title = "{CARER}: Contextualized Affect Representations for Emotion Recognition", - author = "Saravia, Elvis and - Liu, Hsien-Chi Toby and - Huang, Yen-Hao and - Wu, Junlin and - Chen, Yi-Shin", - editor = "Riloff, Ellen and - Chiang, David and - Hockenmaier, Julia and - Tsujii, Jun{'}ichi", - booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", - month = oct # "-" # nov, - year = "2018", - address = "Brussels, Belgium", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D18-1404", - doi = "10.18653/v1/D18-1404", - pages = "3687--3697", - abstract = "Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.", -}""", + bibtex_citation=r""" +@inproceedings{saravia-etal-2018-carer, + abstract = {Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.}, + address = {Brussels, Belgium}, + author = {Saravia, Elvis and +Liu, Hsien-Chi Toby and +Huang, Yen-Hao and +Wu, Junlin and +Chen, Yi-Shin}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D18-1404}, + editor = {Riloff, Ellen and +Chiang, David and +Hockenmaier, Julia and +Tsujii, Jun{'}ichi}, + month = oct # {-} # nov, + pages = {3687--3697}, + publisher = {Association for Computational Linguistics}, + title = {{CARER}: Contextualized Affect Representations for Emotion Recognition}, + url = {https://aclanthology.org/D18-1404}, + year = {2018}, +} +""", prompt="Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise", ) diff --git a/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py b/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py index b9abb5445a..6d3f672f9d 100644 --- a/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py +++ b/mteb/tasks/Classification/eng/FinancialPhrasebankClassification.py @@ -28,15 +28,15 @@ class FinancialPhrasebankClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{Malo2014GoodDO, - title={Good debt or bad debt: Detecting semantic orientations in economic texts}, - author={P. Malo and A. Sinha and P. Korhonen and J. Wallenius and P. Takala}, - journal={Journal of the Association for Information Science and Technology}, - year={2014}, - volume={65} - } - """, + bibtex_citation=r""" +@article{Malo2014GoodDO, + author = {P. Malo and A. Sinha and P. Korhonen and J. Wallenius and P. Takala}, + journal = {Journal of the Association for Information Science and Technology}, + title = {Good debt or bad debt: Detecting semantic orientations in economic texts}, + volume = {65}, + year = {2014}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/eng/FrenkEnClassification.py b/mteb/tasks/Classification/eng/FrenkEnClassification.py index 0d435caf93..b9de110e20 100644 --- a/mteb/tasks/Classification/eng/FrenkEnClassification.py +++ b/mteb/tasks/Classification/eng/FrenkEnClassification.py @@ -27,13 +27,15 @@ class FrenkEnClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{ljubešić2019frenk, - title={The FRENK Datasets of Socially Unacceptable Discourse in Slovene and English}, - author={Nikola Ljubešić and Darja Fišer and Tomaž Erjavec}, - year={2019}, - eprint={1906.02045}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/1906.02045} - }""", + bibtex_citation=r""" +@misc{ljubešić2019frenk, + archiveprefix = {arXiv}, + author = {Nikola Ljubešić and Darja Fišer and Tomaž Erjavec}, + eprint = {1906.02045}, + primaryclass = {cs.CL}, + title = {The FRENK Datasets of Socially Unacceptable Discourse in Slovene and English}, + url = {https://arxiv.org/abs/1906.02045}, + year = {2019}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/ImdbClassification.py b/mteb/tasks/Classification/eng/ImdbClassification.py index 75b540bf47..df2ac734ed 100644 --- a/mteb/tasks/Classification/eng/ImdbClassification.py +++ b/mteb/tasks/Classification/eng/ImdbClassification.py @@ -29,24 +29,26 @@ class ImdbClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{maas-etal-2011-learning, - title = "Learning Word Vectors for Sentiment Analysis", - author = "Maas, Andrew L. and - Daly, Raymond E. and - Pham, Peter T. and - Huang, Dan and - Ng, Andrew Y. and - Potts, Christopher", - editor = "Lin, Dekang and - Matsumoto, Yuji and - Mihalcea, Rada", - booktitle = "Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies", - month = jun, - year = "2011", - address = "Portland, Oregon, USA", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/P11-1015", - pages = "142--150", -}""", + bibtex_citation=r""" +@inproceedings{maas-etal-2011-learning, + address = {Portland, Oregon, USA}, + author = {Maas, Andrew L. and +Daly, Raymond E. and +Pham, Peter T. and +Huang, Dan and +Ng, Andrew Y. and +Potts, Christopher}, + booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}, + editor = {Lin, Dekang and +Matsumoto, Yuji and +Mihalcea, Rada}, + month = jun, + pages = {142--150}, + publisher = {Association for Computational Linguistics}, + title = {Learning Word Vectors for Sentiment Analysis}, + url = {https://aclanthology.org/P11-1015}, + year = {2011}, +} +""", prompt="Classify the sentiment expressed in the given movie review text from the IMDB dataset", ) diff --git a/mteb/tasks/Classification/eng/LegalBenchClassification.py b/mteb/tasks/Classification/eng/LegalBenchClassification.py index 8958aee7e6..d19df22a19 100644 --- a/mteb/tasks/Classification/eng/LegalBenchClassification.py +++ b/mteb/tasks/Classification/eng/LegalBenchClassification.py @@ -32,15 +32,16 @@ class CanadaTaxCourtOutcomesLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=["en-CA"], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -72,21 +73,23 @@ class ContractNLIConfidentialityOfAgreementLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -122,21 +125,23 @@ class ContractNLIExplicitIdentificationLegalBenchClassification(AbsTaskClassific annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -174,21 +179,23 @@ class ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -224,21 +231,23 @@ class ContractNLILimitedUseLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -274,21 +283,23 @@ class ContractNLINoLicensingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -326,21 +337,23 @@ class ContractNLINoticeOnCompelledDisclosureLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -378,21 +391,23 @@ class ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassificat annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -428,21 +443,23 @@ class ContractNLIPermissibleCopyLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -480,21 +497,23 @@ class ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassificat annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -532,21 +551,23 @@ class ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -584,21 +605,23 @@ class ContractNLIReturnOfConfidentialInformationLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -634,21 +657,23 @@ class ContractNLISharingWithEmployeesLegalBenchClassification(AbsTaskClassificat annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -684,21 +709,23 @@ class ContractNLISharingWithThirdPartiesLegalBenchClassification(AbsTaskClassifi annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -734,21 +761,23 @@ class ContractNLISurvivalOfObligationsLegalBenchClassification(AbsTaskClassifica annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - }""", + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -784,16 +813,16 @@ class CorporateLobbyingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -844,22 +873,23 @@ class CUADAffiliateLicenseLicenseeLegalBenchClassification(AbsTaskClassification annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -895,22 +925,23 @@ class CUADAffiliateLicenseLicensorLegalBenchClassification(AbsTaskClassification annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -946,22 +977,23 @@ class CUADAntiAssignmentLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -997,22 +1029,23 @@ class CUADAuditRightsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1048,22 +1081,23 @@ class CUADCapOnLiabilityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1099,22 +1133,23 @@ class CUADChangeOfControlLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1152,22 +1187,23 @@ class CUADCompetitiveRestrictionExceptionLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1203,22 +1239,23 @@ class CUADCovenantNotToSueLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1254,22 +1291,23 @@ class CUADEffectiveDateLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1305,22 +1343,23 @@ class CUADExclusivityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1356,22 +1395,23 @@ class CUADExpirationDateLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1407,22 +1447,23 @@ class CUADGoverningLawLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1458,22 +1499,23 @@ class CUADInsuranceLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1509,22 +1551,23 @@ class CUADIPOwnershipAssignmentLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1560,22 +1603,23 @@ class CUADIrrevocableOrPerpetualLicenseLegalBenchClassification(AbsTaskClassific annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1611,22 +1655,23 @@ class CUADJointIPOwnershipLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1662,22 +1707,23 @@ class CUADLicenseGrantLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1713,22 +1759,23 @@ class CUADLiquidatedDamagesLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1764,22 +1811,23 @@ class CUADMinimumCommitmentLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1815,22 +1863,23 @@ class CUADMostFavoredNationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1866,22 +1915,23 @@ class CUADNoSolicitOfCustomersLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1917,22 +1967,23 @@ class CUADNoSolicitOfEmployeesLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -1968,22 +2019,23 @@ class CUADNonCompeteLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2019,22 +2071,23 @@ class CUADNonDisparagementLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2070,22 +2123,23 @@ class CUADNonTransferableLicenseLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2121,22 +2175,23 @@ class CUADNoticePeriodToTerminateRenewalLegalBenchClassification(AbsTaskClassifi annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2172,22 +2227,23 @@ class CUADPostTerminationServicesLegalBenchClassification(AbsTaskClassification) annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2223,22 +2279,23 @@ class CUADPriceRestrictionsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2274,22 +2331,23 @@ class CUADRenewalTermLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2325,22 +2383,23 @@ class CUADRevenueProfitSharingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2376,22 +2435,23 @@ class CUADRofrRofoRofnLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2427,22 +2487,23 @@ class CUADSourceCodeEscrowLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2478,22 +2539,23 @@ class CUADTerminationForConvenienceLegalBenchClassification(AbsTaskClassificatio annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2529,22 +2591,23 @@ class CUADThirdPartyBeneficiaryLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2580,22 +2643,23 @@ class CUADUncappedLiabilityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2631,22 +2695,23 @@ class CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification(AbsTaskClassifica annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2682,22 +2747,23 @@ class CUADVolumeRestrictionLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2733,22 +2799,23 @@ class CUADWarrantyDurationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -2784,16 +2851,16 @@ class DefinitionClassificationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -2829,16 +2896,16 @@ class Diversity1LegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -2898,16 +2965,16 @@ class Diversity2LegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -2967,16 +3034,16 @@ class Diversity3LegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3036,16 +3103,16 @@ class Diversity4LegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3105,16 +3172,16 @@ class Diversity5LegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3174,16 +3241,16 @@ class Diversity6LegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3251,16 +3318,16 @@ class FunctionOfDecisionSectionLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3299,16 +3366,16 @@ class InsurancePolicyInterpretationLegalBenchClassification(AbsTaskClassificatio annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3344,23 +3411,24 @@ class InternationalCitizenshipQuestionsLegalBenchClassification(AbsTaskClassific annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @misc{vink2023globalcit, - author = {Vink, Maarten and van der Baaren, Luuk and Bauböck, Rainer and Džankić, Jelena and Honohan, Iseult and Manby, Bronwen}, - title = {GLOBALCIT Citizenship Law Dataset, v2.0, Country-Year-Mode Data (Acquisition)}, - howpublished = {https://hdl.handle.net/1814/73190}, - year = {2023}, - publisher = {Global Citizenship Observatory} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@misc{vink2023globalcit, + author = {Vink, Maarten and van der Baaren, Luuk and Bauböck, Rainer and Džankić, Jelena and Honohan, Iseult and Manby, Bronwen}, + howpublished = {https://hdl.handle.net/1814/73190}, + publisher = {Global Citizenship Observatory}, + title = {GLOBALCIT Citizenship Law Dataset, v2.0, Country-Year-Mode Data (Acquisition)}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3401,16 +3469,16 @@ class JCrewBlockerLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -3446,24 +3514,25 @@ class LearnedHandsBenefitsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3499,24 +3568,25 @@ class LearnedHandsBusinessLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3552,24 +3622,25 @@ class LearnedHandsConsumerLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3605,24 +3676,25 @@ class LearnedHandsCourtsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3658,24 +3730,25 @@ class LearnedHandsCrimeLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3711,24 +3784,25 @@ class LearnedHandsDivorceLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3764,24 +3838,25 @@ class LearnedHandsDomesticViolenceLegalBenchClassification(AbsTaskClassification annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3817,24 +3892,25 @@ class LearnedHandsEducationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3870,24 +3946,25 @@ class LearnedHandsEmploymentLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3923,24 +4000,25 @@ class LearnedHandsEstatesLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -3976,24 +4054,25 @@ class LearnedHandsFamilyLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -4032,24 +4111,25 @@ class LearnedHandsHealthLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -4085,24 +4165,25 @@ class LearnedHandsHousingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -4141,24 +4222,25 @@ class LearnedHandsImmigrationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -4194,24 +4276,25 @@ class LearnedHandsTortsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -4247,24 +4330,25 @@ class LearnedHandsTrafficLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @dataset{learned_hands, - title = {LearnedHands Dataset}, - author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, - year = {2022}, - url = {https://spot.suffolklitlab.org/data/#learnedhands}, - note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, - urldate = {2022-05-21} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@dataset{learned_hands, + author = {{Suffolk University Law School} and {Stanford Legal Design Lab}}, + note = {The LearnedHands dataset is licensed under CC BY-NC-SA 4.0}, + title = {LearnedHands Dataset}, + url = {https://spot.suffolklitlab.org/data/#learnedhands}, + urldate = {2022-05-21}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -4300,16 +4384,16 @@ class LegalReasoningCausalityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -4523,22 +4607,23 @@ class MAUDLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @article{wang2023maud, - title={MAUD: An Expert-Annotated Legal NLP Dataset for Merger Agreement Understanding}, - author={Wang, Steven H and Scardigli, Antoine and Tang, Leonard and Chen, Wei and Levkin, Dimitry and Chen, Anya and Ball, Spencer and Woodside, Thomas and Zhang, Oliver and Hendrycks, Dan}, - journal={arXiv preprint arXiv:2301.00876}, - year={2023} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{wang2023maud, + author = {Wang, Steven H and Scardigli, Antoine and Tang, Leonard and Chen, Wei and Levkin, Dimitry and Chen, Anya and Ball, Spencer and Woodside, Thomas and Zhang, Oliver and Hendrycks, Dan}, + journal = {arXiv preprint arXiv:2301.00876}, + title = {MAUD: An Expert-Annotated Legal NLP Dataset for Merger Agreement Understanding}, + year = {2023}, +} +""", ) def load_data(self, **kwargs: Any) -> None: @@ -4630,16 +4715,16 @@ class NYSJudicialEthicsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -4677,23 +4762,24 @@ class OPP115DataRetentionLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -4729,23 +4815,24 @@ class OPP115DataSecurityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -4781,23 +4868,24 @@ class OPP115DoNotTrackLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -4833,23 +4921,24 @@ class OPP115FirstPartyCollectionUseLegalBenchClassification(AbsTaskClassificatio annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -4887,23 +4976,24 @@ class OPP115InternationalAndSpecificAudiencesLegalBenchClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -4939,23 +5029,24 @@ class OPP115PolicyChangeLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -4991,23 +5082,24 @@ class OPP115ThirdPartySharingCollectionLegalBenchClassification(AbsTaskClassific annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -5043,23 +5135,24 @@ class OPP115UserAccessEditAndDeletionLegalBenchClassification(AbsTaskClassificat annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -5095,23 +5188,24 @@ class OPP115UserChoiceControlLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{wilson2016creation, + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, +} +""", ) def dataset_transform(self): @@ -5155,16 +5249,16 @@ class OralArgumentQuestionPurposeLegalBenchClassification(AbsTaskClassification) annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5196,23 +5290,24 @@ class OverrulingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @inproceedings{zheng2021does, - title={When does pretraining help? assessing self-supervised learning for law and the casehold dataset of 53,000+ legal holdings}, - author={Zheng, Lucia and Guha, Neel and Anderson, Brandon R and Henderson, Peter and Ho, Daniel E}, - booktitle={Proceedings of the eighteenth international conference on artificial intelligence and law}, - pages={159--168}, - year={2021} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@inproceedings{zheng2021does, + author = {Zheng, Lucia and Guha, Neel and Anderson, Brandon R and Henderson, Peter and Ho, Daniel E}, + booktitle = {Proceedings of the eighteenth international conference on artificial intelligence and law}, + pages = {159--168}, + title = {When does pretraining help? assessing self-supervised learning for law and the casehold dataset of 53,000+ legal holdings}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -5251,16 +5346,16 @@ class PersonalJurisdictionLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5296,16 +5391,16 @@ class PROALegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5341,25 +5436,26 @@ class SCDBPAccountabilityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5395,25 +5491,26 @@ class SCDBPAuditsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5449,25 +5546,26 @@ class SCDBPCertificationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5503,25 +5601,26 @@ class SCDBPTrainingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5557,25 +5656,26 @@ class SCDBPVerificationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5611,25 +5711,26 @@ class SCDDAccountabilityLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5665,25 +5766,26 @@ class SCDDAuditsLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5719,25 +5821,26 @@ class SCDDCertificationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5773,25 +5876,26 @@ class SCDDTrainingLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5827,25 +5931,26 @@ class SCDDVerificationLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }, - @article{chilton2017limitations, - title={The limitations of supply chain disclosure regimes}, - author={Chilton, Adam S and Sarfaty, Galit A}, - journal={Stan. J. Int'l L.}, - volume={53}, - pages={1}, - year={2017}, - publisher={HeinOnline} - } - """, + bibtex_citation=r""" +@article{chilton2017limitations, + author = {Chilton, Adam S and Sarfaty, Galit A}, + journal = {Stan. J. Int'l L.}, + pages = {1}, + publisher = {HeinOnline}, + title = {The limitations of supply chain disclosure regimes}, + volume = {53}, + year = {2017}, +} + +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5881,16 +5986,16 @@ class TelemarketingSalesRuleLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5926,16 +6031,16 @@ class TextualismToolDictionariesLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -5971,16 +6076,16 @@ class TextualismToolPlainLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -6016,16 +6121,16 @@ class UCCVCommonLawLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} +""", ) def dataset_transform(self): @@ -6063,25 +6168,26 @@ class UnfairTOSLegalBenchClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @article{lippi2019claudette, - title={CLAUDETTE: an automated detector of potentially unfair clauses in online terms of service}, - author={Lippi, Marco and Pa{\l}ka, Przemys{\l}aw and Contissa, Giuseppe and Lagioia, Francesca and Micklitz, Hans-Wolfgang and Sartor, Giovanni and Torroni, Paolo}, - journal={Artificial Intelligence and Law}, - volume={27}, - pages={117--139}, - year={2019}, - publisher={Springer} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{lippi2019claudette, + author = {Lippi, Marco and Pa{\l}ka, Przemys{\l}aw and Contissa, Giuseppe and Lagioia, Francesca and Micklitz, Hans-Wolfgang and Sartor, Giovanni and Torroni, Paolo}, + journal = {Artificial Intelligence and Law}, + pages = {117--139}, + publisher = {Springer}, + title = {CLAUDETTE: an automated detector of potentially unfair clauses in online terms of service}, + volume = {27}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/eng/NewsClassification.py b/mteb/tasks/Classification/eng/NewsClassification.py index e09aa04255..aec198d5c1 100644 --- a/mteb/tasks/Classification/eng/NewsClassification.py +++ b/mteb/tasks/Classification/eng/NewsClassification.py @@ -29,16 +29,17 @@ class NewsClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=["eng-Latn-US", "en-Latn-GB", "en-Latn-AU"], sample_creation="found", - bibtex_citation=""" - @inproceedings{NIPS2015_250cf8b5, - author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Character-level Convolutional Networks for Text Classification}, - url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, - volume = {28}, - year = {2015} - }""", + bibtex_citation=r""" +@inproceedings{NIPS2015_250cf8b5, + author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Character-level Convolutional Networks for Text Classification}, + url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, + volume = {28}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/PatentClassification.py b/mteb/tasks/Classification/eng/PatentClassification.py index 9f10a8a794..3876e64ddf 100644 --- a/mteb/tasks/Classification/eng/PatentClassification.py +++ b/mteb/tasks/Classification/eng/PatentClassification.py @@ -27,24 +27,26 @@ class PatentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{sharma-etal-2019-bigpatent, - title = "{BIGPATENT}: A Large-Scale Dataset for Abstractive and Coherent Summarization", - author = "Sharma, Eva and - Li, Chen and - Wang, Lu", - editor = "Korhonen, Anna and - Traum, David and - M{\`a}rquez, Llu{\'\i}s", - booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics", - month = jul, - year = "2019", - address = "Florence, Italy", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/P19-1212", - doi = "10.18653/v1/P19-1212", - pages = "2204--2213", - abstract = "Most existing text summarization datasets are compiled from the news domain, where summaries have a flattened discourse structure. In such datasets, summary-worthy content often appears in the beginning of input articles. Moreover, large segments from input articles are present verbatim in their respective summaries. These issues impede the learning and evaluation of systems that can understand an article{'}s global content structure as well as produce abstractive summaries with high compression ratio. In this work, we present a novel dataset, BIGPATENT, consisting of 1.3 million records of U.S. patent documents along with human written abstractive summaries. Compared to existing summarization datasets, BIGPATENT has the following properties: i) summaries contain a richer discourse structure with more recurring entities, ii) salient content is evenly distributed in the input, and iii) lesser and shorter extractive fragments are present in the summaries. Finally, we train and evaluate baselines and popular learning models on BIGPATENT to shed light on new challenges and motivate future directions for summarization research.", - }""", + bibtex_citation=r""" +@inproceedings{sharma-etal-2019-bigpatent, + abstract = {Most existing text summarization datasets are compiled from the news domain, where summaries have a flattened discourse structure. In such datasets, summary-worthy content often appears in the beginning of input articles. Moreover, large segments from input articles are present verbatim in their respective summaries. These issues impede the learning and evaluation of systems that can understand an article{'}s global content structure as well as produce abstractive summaries with high compression ratio. In this work, we present a novel dataset, BIGPATENT, consisting of 1.3 million records of U.S. patent documents along with human written abstractive summaries. Compared to existing summarization datasets, BIGPATENT has the following properties: i) summaries contain a richer discourse structure with more recurring entities, ii) salient content is evenly distributed in the input, and iii) lesser and shorter extractive fragments are present in the summaries. Finally, we train and evaluate baselines and popular learning models on BIGPATENT to shed light on new challenges and motivate future directions for summarization research.}, + address = {Florence, Italy}, + author = {Sharma, Eva and +Li, Chen and +Wang, Lu}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/P19-1212}, + editor = {Korhonen, Anna and +Traum, David and +M{\`a}rquez, Llu{\'\i}s}, + month = jul, + pages = {2204--2213}, + publisher = {Association for Computational Linguistics}, + title = {{BIGPATENT}: A Large-Scale Dataset for Abstractive and Coherent Summarization}, + url = {https://aclanthology.org/P19-1212}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/eng/PoemSentimentClassification.py b/mteb/tasks/Classification/eng/PoemSentimentClassification.py index f0110308ee..8671929fea 100644 --- a/mteb/tasks/Classification/eng/PoemSentimentClassification.py +++ b/mteb/tasks/Classification/eng/PoemSentimentClassification.py @@ -27,14 +27,14 @@ class PoemSentimentClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=["eng-Latn-US", "en-Latn-GB"], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @misc{sheng2020investigating, - title={Investigating Societal Biases in a Poetry Composition System}, - author={Emily Sheng and David Uthus}, - year={2020}, - eprint={2011.02686}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + archiveprefix = {arXiv}, + author = {Emily Sheng and David Uthus}, + eprint = {2011.02686}, + primaryclass = {cs.CL}, + title = {Investigating Societal Biases in a Poetry Composition System}, + year = {2020}, } """, ) diff --git a/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py index 197060ba0c..5ae0df8602 100644 --- a/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py +++ b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py @@ -26,19 +26,20 @@ class SDSEyeProtectionClassification(AbsTaskClassification): annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @inproceedings{pereira2020msds, - title={MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, - author={Pereira, Eliseu}, - booktitle={15th Doctoral Symposium}, - pages={42}, - year={2020} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@inproceedings{pereira2020msds, + author = {Pereira, Eliseu}, + booktitle = {15th Doctoral Symposium}, + pages = {42}, + title = {MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/SDSGlovesClassification.py b/mteb/tasks/Classification/eng/SDSGlovesClassification.py index ac471d58e9..41b68096db 100644 --- a/mteb/tasks/Classification/eng/SDSGlovesClassification.py +++ b/mteb/tasks/Classification/eng/SDSGlovesClassification.py @@ -26,19 +26,20 @@ class SDSGlovesClassification(AbsTaskClassification): annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @inproceedings{pereira2020msds, - title={MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, - author={Pereira, Eliseu}, - booktitle={15th Doctoral Symposium}, - pages={42}, - year={2020} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@inproceedings{pereira2020msds, + author = {Pereira, Eliseu}, + booktitle = {15th Doctoral Symposium}, + pages = {42}, + title = {MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/ToxicChatClassification.py b/mteb/tasks/Classification/eng/ToxicChatClassification.py index 51dd5066d3..e189cd51a0 100644 --- a/mteb/tasks/Classification/eng/ToxicChatClassification.py +++ b/mteb/tasks/Classification/eng/ToxicChatClassification.py @@ -37,14 +37,16 @@ class ToxicChatClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{lin2023toxicchat, - title={ToxicChat: Unveiling Hidden Challenges of Toxicity Detection in Real-World User-AI Conversation}, - author={Zi Lin and Zihan Wang and Yongqi Tong and Yangkun Wang and Yuxin Guo and Yujia Wang and Jingbo Shang}, - year={2023}, - eprint={2310.17389}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }""", + bibtex_citation=r""" +@misc{lin2023toxicchat, + archiveprefix = {arXiv}, + author = {Zi Lin and Zihan Wang and Yongqi Tong and Yangkun Wang and Yuxin Guo and Yujia Wang and Jingbo Shang}, + eprint = {2310.17389}, + primaryclass = {cs.CL}, + title = {ToxicChat: Unveiling Hidden Challenges of Toxicity Detection in Real-World User-AI Conversation}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/eng/ToxicConversationsClassification.py b/mteb/tasks/Classification/eng/ToxicConversationsClassification.py index f99d44534d..439b19ba7c 100644 --- a/mteb/tasks/Classification/eng/ToxicConversationsClassification.py +++ b/mteb/tasks/Classification/eng/ToxicConversationsClassification.py @@ -29,13 +29,15 @@ class ToxicConversationsClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{jigsaw-unintended-bias-in-toxicity-classification, - author = {cjadams and Daniel Borkan and inversion and Jeffrey Sorensen and Lucas Dixon and Lucy Vasserman and nithum}, - title = {Jigsaw Unintended Bias in Toxicity Classification}, - publisher = {Kaggle}, - year = {2019}, - url = {https://kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification} -}""", + bibtex_citation=r""" +@misc{jigsaw-unintended-bias-in-toxicity-classification, + author = {cjadams and Daniel Borkan and inversion and Jeffrey Sorensen and Lucas Dixon and Lucy Vasserman and nithum}, + publisher = {Kaggle}, + title = {Jigsaw Unintended Bias in Toxicity Classification}, + url = {https://kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification}, + year = {2019}, +} +""", prompt="Classify the given comments as either toxic or not toxic", ) diff --git a/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py b/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py index d77c44936e..a4ab4b5c70 100644 --- a/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py +++ b/mteb/tasks/Classification/eng/TweetSentimentExtractionClassification.py @@ -29,13 +29,15 @@ class TweetSentimentExtractionClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{tweet-sentiment-extraction, - author = {Maggie, Phil Culliton, Wei Chen}, - title = {Tweet Sentiment Extraction}, - publisher = {Kaggle}, - year = {2020}, - url = {https://kaggle.com/competitions/tweet-sentiment-extraction} -}""", + bibtex_citation=r""" +@misc{tweet-sentiment-extraction, + author = {Maggie, Phil Culliton, Wei Chen}, + publisher = {Kaggle}, + title = {Tweet Sentiment Extraction}, + url = {https://kaggle.com/competitions/tweet-sentiment-extraction}, + year = {2020}, +} +""", prompt="Classify the sentiment of a given tweet as either positive, negative, or neutral", ) diff --git a/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py b/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py index 6c7d4e2bbb..cad250dac0 100644 --- a/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py +++ b/mteb/tasks/Classification/eng/TweetTopicSingleClassification.py @@ -32,22 +32,22 @@ class TweetTopicSingleClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{dimosthenis-etal-2022-twitter, - title = "{T}witter {T}opic {C}lassification", - author = "Antypas, Dimosthenis and - Ushio, Asahi and - Camacho-Collados, Jose and - Neves, Leonardo and - Silva, Vitor and - Barbieri, Francesco", - booktitle = "Proceedings of the 29th International Conference on Computational Linguistics", - month = oct, - year = "2022", - address = "Gyeongju, Republic of Korea", - publisher = "International Committee on Computational Linguistics" - } - """, + bibtex_citation=r""" +@inproceedings{dimosthenis-etal-2022-twitter, + address = {Gyeongju, Republic of Korea}, + author = {Antypas, Dimosthenis and +Ushio, Asahi and +Camacho-Collados, Jose and +Neves, Leonardo and +Silva, Vitor and +Barbieri, Francesco}, + booktitle = {Proceedings of the 29th International Conference on Computational Linguistics}, + month = oct, + publisher = {International Committee on Computational Linguistics}, + title = {{T}witter {T}opic {C}lassification}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py index 3b494f46f6..c6ade13b66 100644 --- a/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py @@ -26,12 +26,12 @@ class WikipediaBioMetChemClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py index 623ec8fc66..cd103aacf8 100644 --- a/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py @@ -26,12 +26,12 @@ class WikipediaBiolumNeurochemClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py index c95abcd4f2..4a61cb4665 100644 --- a/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py @@ -26,12 +26,12 @@ class WikipediaChemEngSpecialtiesClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py index 7c0179fb1e..940bcdc44e 100644 --- a/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py @@ -26,12 +26,12 @@ class WikipediaChemFieldsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py index 02751b1a32..b6e0d362bb 100644 --- a/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py @@ -26,12 +26,12 @@ class WikipediaChemistryTopicsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py index 28a42ac044..8ee7c5b145 100644 --- a/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py @@ -26,12 +26,12 @@ class WikipediaCompChemSpectroscopyClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py index 0e01454298..4f64eb2ff8 100644 --- a/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py @@ -26,12 +26,12 @@ class WikipediaCryobiologySeparationClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py index 724ffc4249..9bc991261a 100644 --- a/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py @@ -26,12 +26,12 @@ class WikipediaCrystallographyAnalyticalClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py index b701584a70..32f8bc949b 100644 --- a/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py @@ -26,12 +26,12 @@ class WikipediaGreenhouseEnantiopureClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py index 252ad85ed9..f3cb8f52c3 100644 --- a/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py @@ -26,12 +26,12 @@ class WikipediaIsotopesFissionClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py index 8e115b59d4..e414138cd6 100644 --- a/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py @@ -26,12 +26,12 @@ class WikipediaLuminescenceClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py index 0ad784b69b..bde49beefa 100644 --- a/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py @@ -26,12 +26,12 @@ class WikipediaOrganicInorganicClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py index a409f87c8d..55ef384d0e 100644 --- a/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py @@ -26,12 +26,12 @@ class WikipediaSaltsSemiconductorsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py index 43f95c50f3..eab8e2cdcc 100644 --- a/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py @@ -26,12 +26,12 @@ class WikipediaSolidStateColloidalClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py index f33b02f4bb..f1d97d3a70 100644 --- a/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py +++ b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py @@ -26,12 +26,12 @@ class WikipediaTheoreticalAppliedClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py b/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py index 9369b0f6b1..7a699bfaaf 100644 --- a/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py +++ b/mteb/tasks/Classification/eng/YahooAnswersTopicsClassification.py @@ -27,18 +27,19 @@ class YahooAnswersTopicsClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{NIPS2015_250cf8b5, - author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Character-level Convolutional Networks for Text Classification}, - url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, - volume = {28}, - year = {2015} - }""", + bibtex_citation=r""" +@inproceedings{NIPS2015_250cf8b5, + author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Character-level Convolutional Networks for Text Classification}, + url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, + volume = {28}, + year = {2015}, +} +""", ) samples_per_label = 32 diff --git a/mteb/tasks/Classification/eng/YelpReviewFullClassification.py b/mteb/tasks/Classification/eng/YelpReviewFullClassification.py index 584d5b5266..2c088af31a 100644 --- a/mteb/tasks/Classification/eng/YelpReviewFullClassification.py +++ b/mteb/tasks/Classification/eng/YelpReviewFullClassification.py @@ -26,19 +26,19 @@ class YelpReviewFullClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{NIPS2015_250cf8b5, - author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Character-level Convolutional Networks for Text Classification}, - url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, - volume = {28}, - year = {2015} - } - """, + bibtex_citation=r""" +@inproceedings{NIPS2015_250cf8b5, + author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Character-level Convolutional Networks for Text Classification}, + url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, + volume = {28}, + year = {2015}, +} +""", ) samples_per_label = 128 diff --git a/mteb/tasks/Classification/est/estonian_valence.py b/mteb/tasks/Classification/est/estonian_valence.py index 3f15ee7925..11561aa385 100644 --- a/mteb/tasks/Classification/est/estonian_valence.py +++ b/mteb/tasks/Classification/est/estonian_valence.py @@ -29,15 +29,16 @@ class EstonianValenceClassification(AbsTaskClassification): license="cc-by-4.0", annotations_creators="human-annotated", sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @article{Pajupuu2023, - author = "Hille Pajupuu and Jaan Pajupuu and Rene Altrov and Kairi Tamuri", - title = "{Estonian Valence Corpus / Eesti valentsikorpus}", - year = "2023", - month = "11", - url = "https://figshare.com/articles/dataset/Estonian_Valence_Corpus_Eesti_valentsikorpus/24517054", - doi = "10.6084/m9.figshare.24517054.v1" -}""", + author = {Hille Pajupuu and Jaan Pajupuu and Rene Altrov and Kairi Tamuri}, + doi = {10.6084/m9.figshare.24517054.v1}, + month = {11}, + title = {{Estonian Valence Corpus / Eesti valentsikorpus}}, + url = {https://figshare.com/articles/dataset/Estonian_Valence_Corpus_Eesti_valentsikorpus/24517054}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py b/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py index f7389e57bc..5b66d39efc 100644 --- a/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py +++ b/mteb/tasks/Classification/fas/PersianFoodSentimentClassification.py @@ -28,15 +28,15 @@ class PersianFoodSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{ParsBERT, - title={ParsBERT: Transformer-based Model for Persian Language Understanding}, - author={Mehrdad Farahani, Mohammad Gharachorloo, Marzieh Farahani, Mohammad Manthouri}, - journal={ArXiv}, - year={2020}, - volume={abs/2005.12515} - } - """, + bibtex_citation=r""" +@article{ParsBERT, + author = {Mehrdad Farahani, Mohammad Gharachorloo, Marzieh Farahani, Mohammad Manthouri}, + journal = {ArXiv}, + title = {ParsBERT: Transformer-based Model for Persian Language Understanding}, + volume = {abs/2005.12515}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py index 3715103ca2..047e91df8e 100644 --- a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py @@ -29,17 +29,17 @@ class FilipinoHateSpeechClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{Cabasag-2019-hate-speech, - title={Hate speech in Philippine election-related tweets: Automatic detection and classification using natural language processing.}, - author={Neil Vicente Cabasag, Vicente Raphael Chan, Sean Christian Lim, Mark Edward Gonzales, and Charibeth Cheng}, - journal={Philippine Computing Journal}, - volume={XIV}, - number={1}, - month={August}, - year={2019} - } - """, + bibtex_citation=r""" +@article{Cabasag-2019-hate-speech, + author = {Neil Vicente Cabasag, Vicente Raphael Chan, Sean Christian Lim, Mark Edward Gonzales, and Charibeth Cheng}, + journal = {Philippine Computing Journal}, + month = {August}, + number = {1}, + title = {Hate speech in Philippine election-related tweets: Automatic detection and classification using natural language processing.}, + volume = {XIV}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py b/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py index 373526cf0c..8c35e86cea 100644 --- a/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoShopeeReviewsClassification.py @@ -26,15 +26,16 @@ class FilipinoShopeeReviewsClassification(AbsTaskClassification): sample_creation="found", date=("2022-05-13", "2023-05-13"), main_score="accuracy", - bibtex_citation=""" - @article{riegoenhancement, - title={Enhancement to Low-Resource Text Classification via Sequential Transfer Learning}, - author={Riego, Neil Christian R. and Villarba, Danny Bell and Sison, Ariel Antwaun Rolando C. and Pineda, Fernandez C. and Lagunzad, Herminiño C.}, - journal={United International Journal for Research & Technology}, - volume={04}, - issue={08}, - pages={72--82} - }""", + bibtex_citation=r""" +@article{riegoenhancement, + author = {Riego, Neil Christian R. and Villarba, Danny Bell and Sison, Ariel Antwaun Rolando C. and Pineda, Fernandez C. and Lagunzad, Herminiño C.}, + issue = {08}, + journal = {United International Journal for Research & Technology}, + pages = {72--82}, + title = {Enhancement to Low-Resource Text Classification via Sequential Transfer Learning}, + volume = {04}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/fin/FinToxicityClassification.py b/mteb/tasks/Classification/fin/FinToxicityClassification.py index 2b582c0143..d847dac5a8 100644 --- a/mteb/tasks/Classification/fin/FinToxicityClassification.py +++ b/mteb/tasks/Classification/fin/FinToxicityClassification.py @@ -30,18 +30,19 @@ class FinToxicityClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation=""" - @inproceedings{eskelinen-etal-2023-toxicity, - title = "Toxicity Detection in {F}innish Using Machine Translation", - author = "Eskelinen, Anni and - Silvala, Laura and - Ginter, Filip and - Pyysalo, Sampo and - Laippala, Veronika", - booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may, - year = "2023", - }""", + bibtex_citation=r""" +@inproceedings{eskelinen-etal-2023-toxicity, + author = {Eskelinen, Anni and +Silvala, Laura and +Ginter, Filip and +Pyysalo, Sampo and +Laippala, Veronika}, + booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, + month = may, + title = {Toxicity Detection in {F}innish Using Machine Translation}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py b/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py index ea1971a715..b488661093 100644 --- a/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/fra/MovieReviewSentimentClassification.py @@ -26,10 +26,10 @@ class MovieReviewSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @software{blard2020, - title = {French sentiment analysis with BERT}, author = {Théophile Blard}, + title = {French sentiment analysis with BERT}, url = {https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}, year = {2020}, } diff --git a/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py b/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py index a4162801b3..5f70955710 100644 --- a/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py +++ b/mteb/tasks/Classification/heb/HebrewSentimentAnalysis.py @@ -30,19 +30,19 @@ class HebrewSentimentAnalysis(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""" - @inproceedings{amram-etal-2018-representations, - title = "Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew", - author = "Amram, Adam and Ben David, Anat and Tsarfaty, Reut", - booktitle = "Proceedings of the 27th International Conference on Computational Linguistics", - month = aug, - year = "2018", - address = "Santa Fe, New Mexico, USA", - publisher = "Association for Computational Linguistics", - url = "https://www.aclweb.org/anthology/C18-1190", - pages = "2242--2252" - } - """, + bibtex_citation=r""" +@inproceedings{amram-etal-2018-representations, + address = {Santa Fe, New Mexico, USA}, + author = {Amram, Adam and Ben David, Anat and Tsarfaty, Reut}, + booktitle = {Proceedings of the 27th International Conference on Computational Linguistics}, + month = aug, + pages = {2242--2252}, + publisher = {Association for Computational Linguistics}, + title = {Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew}, + url = {https://www.aclweb.org/anthology/C18-1190}, + year = {2018}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/hin/HindiDiscourseClassification.py b/mteb/tasks/Classification/hin/HindiDiscourseClassification.py index 936fabe2cd..52fc83a720 100644 --- a/mteb/tasks/Classification/hin/HindiDiscourseClassification.py +++ b/mteb/tasks/Classification/hin/HindiDiscourseClassification.py @@ -27,29 +27,30 @@ class HindiDiscourseClassification(AbsTaskClassification): license="mit", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation=""" - @inproceedings{dhanwal-etal-2020-annotated, - title = "An Annotated Dataset of Discourse Modes in {H}indi Stories", - author = "Dhanwal, Swapnil and - Dutta, Hritwik and - Nankani, Hitesh and - Shrivastava, Nilay and - Kumar, Yaman and - Li, Junyi Jessy and - Mahata, Debanjan and - Gosangi, Rakesh and - Zhang, Haimin and - Shah, Rajiv Ratn and - Stent, Amanda", - booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://www.aclweb.org/anthology/2020.lrec-1.149", - language = "English", - ISBN = "979-10-95546-34-4", -}""", + bibtex_citation=r""" +@inproceedings{dhanwal-etal-2020-annotated, + address = {Marseille, France}, + author = {Dhanwal, Swapnil and +Dutta, Hritwik and +Nankani, Hitesh and +Shrivastava, Nilay and +Kumar, Yaman and +Li, Junyi Jessy and +Mahata, Debanjan and +Gosangi, Rakesh and +Zhang, Haimin and +Shah, Rajiv Ratn and +Stent, Amanda}, + booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference}, + isbn = {979-10-95546-34-4}, + language = {English}, + month = may, + publisher = {European Language Resources Association}, + title = {An Annotated Dataset of Discourse Modes in {H}indi Stories}, + url = {https://www.aclweb.org/anthology/2020.lrec-1.149}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py b/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py index 39daf9b14a..c922567b8f 100644 --- a/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py +++ b/mteb/tasks/Classification/hin/SentimentAnalysisHindi.py @@ -26,13 +26,16 @@ class SentimentAnalysisHindi(AbsTaskClassification): license="cc-by-nc-sa-4.0", annotations_creators="derived", sample_creation="found", - bibtex_citation="""@misc{OdiaGenAI, - author = {Shantipriya Parida and Sambit Sekhar and Soumendra Kumar Sahoo and Swateek Jena and Abhijeet Parida and Satya Ranjan Dash and Guneet Singh Kohli}, - title = {OdiaGenAI: Generative AI and LLM Initiative for the Odia Language}, - year = {2023}, - publisher = {Hugging Face}, - journal = {Hugging Face repository}, - howpublished = {{https://huggingface.co/OdiaGenAI}}, } """, + bibtex_citation=r""" +@misc{OdiaGenAI, + author = {Shantipriya Parida and Sambit Sekhar and Soumendra Kumar Sahoo and Swateek Jena and Abhijeet Parida and Satya Ranjan Dash and Guneet Singh Kohli}, + howpublished = {{https://huggingface.co/OdiaGenAI}}, + journal = {Hugging Face repository}, + publisher = {Hugging Face}, + title = {OdiaGenAI: Generative AI and LLM Initiative for the Odia Language}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/hrv/FrenkHrClassification.py b/mteb/tasks/Classification/hrv/FrenkHrClassification.py index e89ad42eb3..440e0c90ad 100644 --- a/mteb/tasks/Classification/hrv/FrenkHrClassification.py +++ b/mteb/tasks/Classification/hrv/FrenkHrClassification.py @@ -27,13 +27,15 @@ class FrenkHrClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{ljubešić2019frenk, - title={The FRENK Datasets of Socially Unacceptable Discourse in Slovene and English}, - author={Nikola Ljubešić and Darja Fišer and Tomaž Erjavec}, - year={2019}, - eprint={1906.02045}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/1906.02045} - }""", + bibtex_citation=r""" +@misc{ljubešić2019frenk, + archiveprefix = {arXiv}, + author = {Nikola Ljubešić and Darja Fišer and Tomaž Erjavec}, + eprint = {1906.02045}, + primaryclass = {cs.CL}, + title = {The FRENK Datasets of Socially Unacceptable Discourse in Slovene and English}, + url = {https://arxiv.org/abs/1906.02045}, + year = {2019}, +} +""", ) diff --git a/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py b/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py index 9fece9e214..adcfbd57df 100644 --- a/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py +++ b/mteb/tasks/Classification/ind/IndonesianIdClickbaitClassification.py @@ -26,19 +26,19 @@ class IndonesianIdClickbaitClassification(AbsTaskClassification): license="cc-by-4.0", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation=""" - @article{WILLIAM2020106231, -title = "CLICK-ID: A novel dataset for Indonesian clickbait headlines", -journal = "Data in Brief", -volume = "32", -pages = "106231", -year = "2020", -issn = "2352-3409", -doi = "https://doi.org/10.1016/j.dib.2020.106231", -url = "http://www.sciencedirect.com/science/article/pii/S2352340920311252", -author = "Andika William and Yunita Sari", -keywords = "Indonesian, Natural Language Processing, News articles, Clickbait, Text-classification", -abstract = "News analysis is a popular task in Natural Language Processing (NLP). In particular, the problem of clickbait in news analysis has gained attention in recent years [1, 2]. However, the majority of the tasks has been focused on English news, in which there is already a rich representative resource. For other languages, such as Indonesian, there is still a lack of resource for clickbait tasks. Therefore, we introduce the CLICK-ID dataset of Indonesian news headlines extracted from 12 Indonesian online news publishers. It is comprised of 15,000 annotated headlines with clickbait and non-clickbait labels. Using the CLICK-ID dataset, we then developed an Indonesian clickbait classification model achieving favourable performance. We believe that this corpus will be useful for replicable experiments in clickbait detection or other experiments in NLP areas." + bibtex_citation=r""" +@article{WILLIAM2020106231, + abstract = {News analysis is a popular task in Natural Language Processing (NLP). In particular, the problem of clickbait in news analysis has gained attention in recent years [1, 2]. However, the majority of the tasks has been focused on English news, in which there is already a rich representative resource. For other languages, such as Indonesian, there is still a lack of resource for clickbait tasks. Therefore, we introduce the CLICK-ID dataset of Indonesian news headlines extracted from 12 Indonesian online news publishers. It is comprised of 15,000 annotated headlines with clickbait and non-clickbait labels. Using the CLICK-ID dataset, we then developed an Indonesian clickbait classification model achieving favourable performance. We believe that this corpus will be useful for replicable experiments in clickbait detection or other experiments in NLP areas.}, + author = {Andika William and Yunita Sari}, + doi = {https://doi.org/10.1016/j.dib.2020.106231}, + issn = {2352-3409}, + journal = {Data in Brief}, + keywords = {Indonesian, Natural Language Processing, News articles, Clickbait, Text-classification}, + pages = {106231}, + title = {CLICK-ID: A novel dataset for Indonesian clickbait headlines}, + url = {http://www.sciencedirect.com/science/article/pii/S2352340920311252}, + volume = {32}, + year = {2020}, } """, ) diff --git a/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py b/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py index 91e54bc137..cef0f33fac 100644 --- a/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py +++ b/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py @@ -31,29 +31,29 @@ class IndonesianMongabayConservationClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{fransiska-etal-2023-utilizing, - title = "Utilizing Weak Supervision to Generate {I}ndonesian Conservation Datasets", - author = "Fransiska, Mega and - Pitaloka, Diah and - Saripudin, Saripudin and - Putra, Satrio and - Sutawika*, Lintang", - editor = "Wijaya, Derry and - Aji, Alham Fikri and - Vania, Clara and - Winata, Genta Indra and - Purwarianti, Ayu", - booktitle = "Proceedings of the First Workshop in South East Asian Language Processing", - month = nov, - year = "2023", - address = "Nusa Dua, Bali, Indonesia", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.sealp-1.4", - doi = "10.18653/v1/2023.sealp-1.4", - pages = "30--54", - } - """, + bibtex_citation=r""" +@inproceedings{fransiska-etal-2023-utilizing, + address = {Nusa Dua, Bali, Indonesia}, + author = {Fransiska, Mega and +Pitaloka, Diah and +Saripudin, Saripudin and +Putra, Satrio and +Sutawika*, Lintang}, + booktitle = {Proceedings of the First Workshop in South East Asian Language Processing}, + doi = {10.18653/v1/2023.sealp-1.4}, + editor = {Wijaya, Derry and +Aji, Alham Fikri and +Vania, Clara and +Winata, Genta Indra and +Purwarianti, Ayu}, + month = nov, + pages = {30--54}, + publisher = {Association for Computational Linguistics}, + title = {Utilizing Weak Supervision to Generate {I}ndonesian Conservation Datasets}, + url = {https://aclanthology.org/2023.sealp-1.4}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/ita/ItaCaseholdClassification.py b/mteb/tasks/Classification/ita/ItaCaseholdClassification.py index 837383ff69..b2024d6e79 100644 --- a/mteb/tasks/Classification/ita/ItaCaseholdClassification.py +++ b/mteb/tasks/Classification/ita/ItaCaseholdClassification.py @@ -26,25 +26,25 @@ class ItaCaseholdClassification(AbsTaskClassification): license="apache-2.0", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation=""" - @inproceedings{10.1145/3594536.3595177, - author = {Licari, Daniele and Bushipaka, Praveen and Marino, Gabriele and Comand\'{e}, Giovanni and Cucinotta, Tommaso}, - title = {Legal Holding Extraction from Italian Case Documents using Italian-LEGAL-BERT Text Summarization}, - year = {2023}, - isbn = {9798400701979}, - publisher = {Association for Computing Machinery}, - address = {New York, NY, USA}, - url = {https://doi.org/10.1145/3594536.3595177}, - doi = {10.1145/3594536.3595177}, - abstract = {Legal holdings are used in Italy as a critical component of the legal system, serving to establish legal precedents, provide guidance for future legal decisions, and ensure consistency and predictability in the interpretation and application of the law. They are written by domain experts who describe in a clear and concise manner the principle of law applied in the judgments.We introduce a legal holding extraction method based on Italian-LEGAL-BERT to automatically extract legal holdings from Italian cases. In addition, we present ITA-CaseHold, a benchmark dataset for Italian legal summarization. We conducted several experiments using this dataset, as a valuable baseline for future research on this topic.}, - booktitle = {Proceedings of the Nineteenth International Conference on Artificial Intelligence and Law}, - pages = {148–156}, - numpages = {9}, - keywords = {Italian-LEGAL-BERT, Holding Extraction, Extractive Text Summarization, Benchmark Dataset}, - location = {, Braga, Portugal, }, - series = {ICAIL '23} - } - """, + bibtex_citation=r""" +@inproceedings{10.1145/3594536.3595177, + abstract = {Legal holdings are used in Italy as a critical component of the legal system, serving to establish legal precedents, provide guidance for future legal decisions, and ensure consistency and predictability in the interpretation and application of the law. They are written by domain experts who describe in a clear and concise manner the principle of law applied in the judgments.We introduce a legal holding extraction method based on Italian-LEGAL-BERT to automatically extract legal holdings from Italian cases. In addition, we present ITA-CaseHold, a benchmark dataset for Italian legal summarization. We conducted several experiments using this dataset, as a valuable baseline for future research on this topic.}, + address = {New York, NY, USA}, + author = {Licari, Daniele and Bushipaka, Praveen and Marino, Gabriele and Comand\'{e}, Giovanni and Cucinotta, Tommaso}, + booktitle = {Proceedings of the Nineteenth International Conference on Artificial Intelligence and Law}, + doi = {10.1145/3594536.3595177}, + isbn = {9798400701979}, + keywords = {Italian-LEGAL-BERT, Holding Extraction, Extractive Text Summarization, Benchmark Dataset}, + location = {, Braga, Portugal, }, + numpages = {9}, + pages = {148–156}, + publisher = {Association for Computing Machinery}, + series = {ICAIL '23}, + title = {Legal Holding Extraction from Italian Case Documents using Italian-LEGAL-BERT Text Summarization}, + url = {https://doi.org/10.1145/3594536.3595177}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py b/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py index 9509f4d9ed..db6f371494 100644 --- a/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py +++ b/mteb/tasks/Classification/ita/ItalianLinguistAcceptabilityClassification.py @@ -27,23 +27,23 @@ class ItalianLinguisticAcceptabilityClassification(AbsTaskClassification): license="not specified", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation=""" - @inproceedings{trotta-etal-2021-monolingual-cross, - title = "Monolingual and Cross-Lingual Acceptability Judgments with the {I}talian {C}o{LA} corpus", - author = "Trotta, Daniela and - Guarasci, Raffaele and - Leonardelli, Elisa and - Tonelli, Sara", - booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021", - month = nov, - year = "2021", - address = "Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.findings-emnlp.250", - doi = "10.18653/v1/2021.findings-emnlp.250", - pages = "2929--2940" + bibtex_citation=r""" +@inproceedings{trotta-etal-2021-monolingual-cross, + address = {Punta Cana, Dominican Republic}, + author = {Trotta, Daniela and +Guarasci, Raffaele and +Leonardelli, Elisa and +Tonelli, Sara}, + booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021}, + doi = {10.18653/v1/2021.findings-emnlp.250}, + month = nov, + pages = {2929--2940}, + publisher = {Association for Computational Linguistics}, + title = {Monolingual and Cross-Lingual Acceptability Judgments with the {I}talian {C}o{LA} corpus}, + url = {https://aclanthology.org/2021.findings-emnlp.250}, + year = {2021}, } - """, +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py b/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py index bc79f0b851..b0fa0144bd 100644 --- a/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py +++ b/mteb/tasks/Classification/jav/JavaneseIMDBClassification.py @@ -27,16 +27,16 @@ class JavaneseIMDBClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{wongso2021causal, - title={Causal and Masked Language Modeling of Javanese Language using Transformer-based Architectures}, - author={Wongso, Wilson and Setiawan, David Samuel and Suhartono, Derwin}, - booktitle={2021 International Conference on Advanced Computer Science and Information Systems (ICACSIS)}, - pages={1--7}, - year={2021}, - organization={IEEE} - } - """, + bibtex_citation=r""" +@inproceedings{wongso2021causal, + author = {Wongso, Wilson and Setiawan, David Samuel and Suhartono, Derwin}, + booktitle = {2021 International Conference on Advanced Computer Science and Information Systems (ICACSIS)}, + organization = {IEEE}, + pages = {1--7}, + title = {Causal and Masked Language Modeling of Javanese Language using Transformer-based Architectures}, + year = {2021}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/jpn/WRIMEClassification.py b/mteb/tasks/Classification/jpn/WRIMEClassification.py index 623a266177..893b092167 100644 --- a/mteb/tasks/Classification/jpn/WRIMEClassification.py +++ b/mteb/tasks/Classification/jpn/WRIMEClassification.py @@ -28,32 +28,34 @@ class WRIMEClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{kajiwara-etal-2021-wrime, - title = "{WRIME}: A New Dataset for Emotional Intensity Estimation with Subjective and Objective Annotations", - author = "Kajiwara, Tomoyuki and - Chu, Chenhui and - Takemura, Noriko and - Nakashima, Yuta and - Nagahara, Hajime", - editor = "Toutanova, Kristina and - Rumshisky, Anna and - Zettlemoyer, Luke and - Hakkani-Tur, Dilek and - Beltagy, Iz and - Bethard, Steven and - Cotterell, Ryan and - Chakraborty, Tanmoy and - Zhou, Yichao", - booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies", - month = jun, - year = "2021", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.naacl-main.169", - doi = "10.18653/v1/2021.naacl-main.169", - pages = "2095--2104", - abstract = "We annotate 17,000 SNS posts with both the writer{'}s subjective emotional intensity and the reader{'}s objective one to construct a Japanese emotion analysis dataset. In this study, we explore the difference between the emotional intensity of the writer and that of the readers with this dataset. We found that the reader cannot fully detect the emotions of the writer, especially anger and trust. In addition, experimental results in estimating the emotional intensity show that it is more difficult to estimate the writer{'}s subjective labels than the readers{'}. The large gap between the subjective and objective emotions imply the complexity of the mapping from a post to the subjective emotion intensities, which also leads to a lower performance with machine learning models.", -}""", + bibtex_citation=r""" +@inproceedings{kajiwara-etal-2021-wrime, + abstract = {We annotate 17,000 SNS posts with both the writer{'}s subjective emotional intensity and the reader{'}s objective one to construct a Japanese emotion analysis dataset. In this study, we explore the difference between the emotional intensity of the writer and that of the readers with this dataset. We found that the reader cannot fully detect the emotions of the writer, especially anger and trust. In addition, experimental results in estimating the emotional intensity show that it is more difficult to estimate the writer{'}s subjective labels than the readers{'}. The large gap between the subjective and objective emotions imply the complexity of the mapping from a post to the subjective emotion intensities, which also leads to a lower performance with machine learning models.}, + address = {Online}, + author = {Kajiwara, Tomoyuki and +Chu, Chenhui and +Takemura, Noriko and +Nakashima, Yuta and +Nagahara, Hajime}, + booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, + doi = {10.18653/v1/2021.naacl-main.169}, + editor = {Toutanova, Kristina and +Rumshisky, Anna and +Zettlemoyer, Luke and +Hakkani-Tur, Dilek and +Beltagy, Iz and +Bethard, Steven and +Cotterell, Ryan and +Chakraborty, Tanmoy and +Zhou, Yichao}, + month = jun, + pages = {2095--2104}, + publisher = {Association for Computational Linguistics}, + title = {{WRIME}: A New Dataset for Emotional Intensity Estimation with Subjective and Objective Annotations}, + url = {https://aclanthology.org/2021.naacl-main.169}, + year = {2021}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/kan/KannadaNewsClassification.py b/mteb/tasks/Classification/kan/KannadaNewsClassification.py index f005e56518..4d3edcc6ca 100644 --- a/mteb/tasks/Classification/kan/KannadaNewsClassification.py +++ b/mteb/tasks/Classification/kan/KannadaNewsClassification.py @@ -26,13 +26,14 @@ class KannadaNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085}, -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py b/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py index e34d148a36..aa7458c97b 100644 --- a/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py +++ b/mteb/tasks/Classification/kat/GeorgianSentimentClassification.py @@ -26,33 +26,33 @@ class GeorgianSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{stefanovitch-etal-2022-resources, - title = "Resources and Experiments on Sentiment Classification for {G}eorgian", - author = "Stefanovitch, Nicolas and - Piskorski, Jakub and - Kharazi, Sopho", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.173", - pages = "1613--1621", - abstract = "This paper presents, to the best of our knowledge, the first ever publicly available annotated dataset for sentiment classification and semantic polarity dictionary for Georgian. The characteristics of these resources and the process of their creation are described in detail. The results of various experiments on the performance of both lexicon- and machine learning-based models for Georgian sentiment classification are also reported. Both 3-label (positive, neutral, negative) and 4-label settings (same labels + mixed) are considered. The machine learning models explored include, i.a., logistic regression, SVMs, and transformed-based models. We also explore transfer learning- and translation-based (to a well-supported language) approaches. The obtained results for Georgian are on par with the state-of-the-art results in sentiment classification for well studied languages when using training data of comparable size.", + bibtex_citation=r""" +@inproceedings{stefanovitch-etal-2022-resources, + abstract = {This paper presents, to the best of our knowledge, the first ever publicly available annotated dataset for sentiment classification and semantic polarity dictionary for Georgian. The characteristics of these resources and the process of their creation are described in detail. The results of various experiments on the performance of both lexicon- and machine learning-based models for Georgian sentiment classification are also reported. Both 3-label (positive, neutral, negative) and 4-label settings (same labels + mixed) are considered. The machine learning models explored include, i.a., logistic regression, SVMs, and transformed-based models. We also explore transfer learning- and translation-based (to a well-supported language) approaches. The obtained results for Georgian are on par with the state-of-the-art results in sentiment classification for well studied languages when using training data of comparable size.}, + address = {Marseille, France}, + author = {Stefanovitch, Nicolas and +Piskorski, Jakub and +Kharazi, Sopho}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Odijk, Jan and +Piperidis, Stelios}, + month = jun, + pages = {1613--1621}, + publisher = {European Language Resources Association}, + title = {Resources and Experiments on Sentiment Classification for {G}eorgian}, + url = {https://aclanthology.org/2022.lrec-1.173}, + year = {2022}, } - """, +""", ) diff --git a/mteb/tasks/Classification/kor/KlueTC.py b/mteb/tasks/Classification/kor/KlueTC.py index 55a2d760d9..bf878570ac 100644 --- a/mteb/tasks/Classification/kor/KlueTC.py +++ b/mteb/tasks/Classification/kor/KlueTC.py @@ -27,14 +27,16 @@ class KlueTC(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{park2021klue, - title={KLUE: Korean Language Understanding Evaluation}, - author={Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, - year={2021}, - eprint={2105.09680}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{park2021klue, + archiveprefix = {arXiv}, + author = {Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, + eprint = {2105.09680}, + primaryclass = {cs.CL}, + title = {KLUE: Korean Language Understanding Evaluation}, + year = {2021}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/kor/KorFin.py b/mteb/tasks/Classification/kor/KorFin.py index 1fdfb47694..c59de86c92 100644 --- a/mteb/tasks/Classification/kor/KorFin.py +++ b/mteb/tasks/Classification/kor/KorFin.py @@ -31,14 +31,14 @@ class KorFin(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""" - @article{son2023removing, - title={Removing Non-Stationary Knowledge From Pre-Trained Language Models for Entity-Level Sentiment Classification in Finance}, - author={Son, Guijin and Lee, Hanwool and Kang, Nahyeon and Hahm, Moonjeong}, - journal={arXiv preprint arXiv:2301.03136}, - year={2023} - } - """, + bibtex_citation=r""" +@article{son2023removing, + author = {Son, Guijin and Lee, Hanwool and Kang, Nahyeon and Hahm, Moonjeong}, + journal = {arXiv preprint arXiv:2301.03136}, + title = {Removing Non-Stationary Knowledge From Pre-Trained Language Models for Entity-Level Sentiment Classification in Finance}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/kor/KorHateClassification.py b/mteb/tasks/Classification/kor/KorHateClassification.py index 49cce26a25..a9ec38fdef 100644 --- a/mteb/tasks/Classification/kor/KorHateClassification.py +++ b/mteb/tasks/Classification/kor/KorHateClassification.py @@ -34,15 +34,16 @@ class KorHateClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{moon2020beep, - title={BEEP! Korean Corpus of Online News Comments for Toxic Speech Detection}, - author={Jihyung Moon and Won Ik Cho and Junbum Lee}, - year={2020}, - eprint={2005.12503}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }""", + bibtex_citation=r""" +@misc{moon2020beep, + archiveprefix = {arXiv}, + author = {Jihyung Moon and Won Ik Cho and Junbum Lee}, + eprint = {2005.12503}, + primaryclass = {cs.CL}, + title = {BEEP! Korean Corpus of Online News Comments for Toxic Speech Detection}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/kor/KorSarcasmClassification.py b/mteb/tasks/Classification/kor/KorSarcasmClassification.py index 666ae4ca54..abae7c8222 100644 --- a/mteb/tasks/Classification/kor/KorSarcasmClassification.py +++ b/mteb/tasks/Classification/kor/KorSarcasmClassification.py @@ -34,16 +34,16 @@ class KorSarcasmClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{kim2019kocasm, - author = {Kim, Jiwon and Cho, Won Ik}, - title = {Kocasm: Korean Automatic Sarcasm Detection}, - year = {2019}, - publisher = {GitHub}, - journal = {GitHub repository}, - howpublished = {https://github.com/SpellOnYou/korean-sarcasm} - } - """, + bibtex_citation=r""" +@misc{kim2019kocasm, + author = {Kim, Jiwon and Cho, Won Ik}, + howpublished = {https://github.com/SpellOnYou/korean-sarcasm}, + journal = {GitHub repository}, + publisher = {GitHub}, + title = {Kocasm: Korean Automatic Sarcasm Detection}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/kur/KurdishSentimentClassification.py b/mteb/tasks/Classification/kur/KurdishSentimentClassification.py index 2f9564caff..876b7450fd 100644 --- a/mteb/tasks/Classification/kur/KurdishSentimentClassification.py +++ b/mteb/tasks/Classification/kur/KurdishSentimentClassification.py @@ -26,15 +26,15 @@ class KurdishSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=["Sorani"], sample_creation="found", - bibtex_citation=""" - @article{article, - author = {Badawi, Soran and Kazemi, Arefeh and Rezaie, Vali}, - year = {2024}, - month = {01}, - pages = {1-20}, - title = {KurdiSent: a corpus for kurdish sentiment analysis}, - journal = {Language Resources and Evaluation}, - doi = {10.1007/s10579-023-09716-6} - } - """, + bibtex_citation=r""" +@article{article, + author = {Badawi, Soran and Kazemi, Arefeh and Rezaie, Vali}, + doi = {10.1007/s10579-023-09716-6}, + journal = {Language Resources and Evaluation}, + month = {01}, + pages = {1-20}, + title = {KurdiSent: a corpus for kurdish sentiment analysis}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Classification/mal/MalayalamNewsClassification.py b/mteb/tasks/Classification/mal/MalayalamNewsClassification.py index e454700717..689e7688ac 100644 --- a/mteb/tasks/Classification/mal/MalayalamNewsClassification.py +++ b/mteb/tasks/Classification/mal/MalayalamNewsClassification.py @@ -26,12 +26,14 @@ class MalayalamNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085}, -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/mar/MarathiNewsClassification.py b/mteb/tasks/Classification/mar/MarathiNewsClassification.py index 7fa104c444..4f652e2373 100644 --- a/mteb/tasks/Classification/mar/MarathiNewsClassification.py +++ b/mteb/tasks/Classification/mar/MarathiNewsClassification.py @@ -26,12 +26,14 @@ class MarathiNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085}, -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py b/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py index 58a555c6b1..3eb6b2dc81 100644 --- a/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py +++ b/mteb/tasks/Classification/mkd/MacedonianTweetSentimentClassification.py @@ -26,20 +26,22 @@ class MacedonianTweetSentimentClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{jovanoski-etal-2015-sentiment, - title = "Sentiment Analysis in {T}witter for {M}acedonian", - author = "Jovanoski, Dame and - Pachovski, Veno and - Nakov, Preslav", - editor = "Mitkov, Ruslan and - Angelova, Galia and - Bontcheva, Kalina", - booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing", - month = sep, - year = "2015", - address = "Hissar, Bulgaria", - publisher = "INCOMA Ltd. Shoumen, BULGARIA", - url = "https://aclanthology.org/R15-1034", - pages = "249--257", -}""", + bibtex_citation=r""" +@inproceedings{jovanoski-etal-2015-sentiment, + address = {Hissar, Bulgaria}, + author = {Jovanoski, Dame and +Pachovski, Veno and +Nakov, Preslav}, + booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing}, + editor = {Mitkov, Ruslan and +Angelova, Galia and +Bontcheva, Kalina}, + month = sep, + pages = {249--257}, + publisher = {INCOMA Ltd. Shoumen, BULGARIA}, + title = {Sentiment Analysis in {T}witter for {M}acedonian}, + url = {https://aclanthology.org/R15-1034}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py index 46332e1bee..ff3e6575c9 100644 --- a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py +++ b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py @@ -52,11 +52,13 @@ class AfriSentiClassification(MultilingualTask, AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{Muhammad2023AfriSentiAT, - title={AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages}, - author={Shamsuddeen Hassan Muhammad and Idris Abdulmumin and Abinew Ali Ayele and Nedjma Ousidhoum and David Ifeoluwa Adelani and Seid Muhie Yimam and Ibrahim Sa'id Ahmad and Meriem Beloucif and Saif Mohammad and Sebastian Ruder and Oumaima Hourrane and Pavel Brazdil and Felermino D'ario M'ario Ant'onio Ali and Davis Davis and Salomey Osei and Bello Shehu Bello and Falalu Ibrahim and Tajuddeen Gwadabe and Samuel Rutunda and Tadesse Belay and Wendimu Baye Messelle and Hailu Beshada Balcha and Sisay Adugna Chala and Hagos Tesfahun Gebremichael and Bernard Opoku and Steven Arthur}, - year={2023} - }""", + bibtex_citation=r""" +@inproceedings{Muhammad2023AfriSentiAT, + author = {Shamsuddeen Hassan Muhammad and Idris Abdulmumin and Abinew Ali Ayele and Nedjma Ousidhoum and David Ifeoluwa Adelani and Seid Muhie Yimam and Ibrahim Sa'id Ahmad and Meriem Beloucif and Saif Mohammad and Sebastian Ruder and Oumaima Hourrane and Pavel Brazdil and Felermino D'ario M'ario Ant'onio Ali and Davis Davis and Salomey Osei and Bello Shehu Bello and Falalu Ibrahim and Tajuddeen Gwadabe and Samuel Rutunda and Tadesse Belay and Wendimu Baye Messelle and Hailu Beshada Balcha and Sisay Adugna Chala and Hagos Tesfahun Gebremichael and Bernard Opoku and Steven Arthur}, + title = {AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py index 112d4e0b27..165a5ea54d 100644 --- a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py @@ -38,27 +38,29 @@ class AmazonCounterfactualClassification(MultilingualTask, AbsTaskClassification annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{oneill-etal-2021-wish, - title = "{I} Wish {I} Would Have Loved This One, But {I} Didn{'}t {--} A Multilingual Dataset for Counterfactual Detection in Product Review", - author = "O{'}Neill, James and - Rozenshtein, Polina and - Kiryo, Ryuichi and - Kubota, Motoko and - Bollegala, Danushka", - editor = "Moens, Marie-Francine and - Huang, Xuanjing and - Specia, Lucia and - Yih, Scott Wen-tau", - booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", - month = nov, - year = "2021", - address = "Online and Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.emnlp-main.568", - doi = "10.18653/v1/2021.emnlp-main.568", - pages = "7092--7108", - abstract = "Counterfactual statements describe events that did not or cannot take place. We consider the problem of counterfactual detection (CFD) in product reviews. For this purpose, we annotate a multilingual CFD dataset from Amazon product reviews covering counterfactual statements written in English, German, and Japanese languages. The dataset is unique as it contains counterfactuals in multiple languages, covers a new application area of e-commerce reviews, and provides high quality professional annotations. We train CFD models using different text representation methods and classifiers. We find that these models are robust against the selectional biases introduced due to cue phrase-based sentence selection. Moreover, our CFD dataset is compatible with prior datasets and can be merged to learn accurate CFD models. Applying machine translation on English counterfactual examples to create multilingual data performs poorly, demonstrating the language-specificity of this problem, which has been ignored so far.", -}""", + bibtex_citation=r""" +@inproceedings{oneill-etal-2021-wish, + abstract = {Counterfactual statements describe events that did not or cannot take place. We consider the problem of counterfactual detection (CFD) in product reviews. For this purpose, we annotate a multilingual CFD dataset from Amazon product reviews covering counterfactual statements written in English, German, and Japanese languages. The dataset is unique as it contains counterfactuals in multiple languages, covers a new application area of e-commerce reviews, and provides high quality professional annotations. We train CFD models using different text representation methods and classifiers. We find that these models are robust against the selectional biases introduced due to cue phrase-based sentence selection. Moreover, our CFD dataset is compatible with prior datasets and can be merged to learn accurate CFD models. Applying machine translation on English counterfactual examples to create multilingual data performs poorly, demonstrating the language-specificity of this problem, which has been ignored so far.}, + address = {Online and Punta Cana, Dominican Republic}, + author = {O{'}Neill, James and +Rozenshtein, Polina and +Kiryo, Ryuichi and +Kubota, Motoko and +Bollegala, Danushka}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/2021.emnlp-main.568}, + editor = {Moens, Marie-Francine and +Huang, Xuanjing and +Specia, Lucia and +Yih, Scott Wen-tau}, + month = nov, + pages = {7092--7108}, + publisher = {Association for Computational Linguistics}, + title = {{I} Wish {I} Would Have Loved This One, But {I} Didn{'}t {--} A Multilingual Dataset for Counterfactual Detection in Product Review}, + url = {https://aclanthology.org/2021.emnlp-main.568}, + year = {2021}, +} +""", prompt="Classify a given Amazon customer review text as either counterfactual or not-counterfactual", ) diff --git a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py index ca9fab41e0..d2250701f1 100644 --- a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py @@ -35,13 +35,15 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{keung2020multilingual, - title={The Multilingual Amazon Reviews Corpus}, - author={Phillip Keung and Yichao Lu and György Szarvas and Noah A. Smith}, - year={2020}, - eprint={2010.02573}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{keung2020multilingual, + archiveprefix = {arXiv}, + author = {Phillip Keung and Yichao Lu and György Szarvas and Noah A. Smith}, + eprint = {2010.02573}, + primaryclass = {cs.CL}, + title = {The Multilingual Amazon Reviews Corpus}, + year = {2020}, +} +""", prompt="Classify the given Amazon review into its appropriate rating category", ) diff --git a/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py b/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py index c21fee9cfa..f65861e3fb 100644 --- a/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py +++ b/mteb/tasks/Classification/multilingual/CataloniaTweetClassification.py @@ -37,33 +37,35 @@ class CataloniaTweetClassification(MultilingualTask, AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{zotova-etal-2020-multilingual, - title = "Multilingual Stance Detection in Tweets: The {C}atalonia Independence Corpus", - author = "Zotova, Elena and - Agerri, Rodrigo and - Nu{\~n}ez, Manuel and - Rigau, German", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", - month = may, - year = "2020", - publisher = "European Language Resources Association", - pages = "1368--1375", - ISBN = "979-10-95546-34-4", - }""", + bibtex_citation=r""" +@inproceedings{zotova-etal-2020-multilingual, + author = {Zotova, Elena and +Agerri, Rodrigo and +Nu{\~n}ez, Manuel and +Rigau, German}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + isbn = {979-10-95546-34-4}, + month = may, + pages = {1368--1375}, + publisher = {European Language Resources Association}, + title = {Multilingual Stance Detection in Tweets: The {C}atalonia Independence Corpus}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py b/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py index 3c0d2ca2a2..6aebfe32c9 100644 --- a/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/CyrillicTurkicLangClassification.py @@ -36,14 +36,14 @@ class CyrillicTurkicLangClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{goldhahn2012building, - title={Building Large Monolingual Dictionaries at the Leipzig Corpora Collection: From 100 to 200 Languages}, - author={Goldhahn, Dirk and Eckart, Thomas and Quasthoff, Uwe}, - booktitle={Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)}, - year={2012} - } - """, + bibtex_citation=r""" +@inproceedings{goldhahn2012building, + author = {Goldhahn, Dirk and Eckart, Thomas and Quasthoff, Uwe}, + booktitle = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)}, + title = {Building Large Monolingual Dictionaries at the Leipzig Corpora Collection: From 100 to 200 Languages}, + year = {2012}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/HinDialectClassification.py b/mteb/tasks/Classification/multilingual/HinDialectClassification.py index c9d6b36669..08258c3387 100644 --- a/mteb/tasks/Classification/multilingual/HinDialectClassification.py +++ b/mteb/tasks/Classification/multilingual/HinDialectClassification.py @@ -50,15 +50,16 @@ class HinDialectClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{11234/1-4839, - title = {{HinDialect} 1.1: 26 Hindi-related languages and dialects of the Indic Continuum in North India}, - author = {Bafna, Niyati and {\v Z}abokrtsk{\'y}, Zden{\v e}k and Espa{\~n}a-Bonet, Cristina and van Genabith, Josef and Kumar, Lalit "Samyak Lalit" and Suman, Sharda and Shivay, Rahul}, - url = {http://hdl.handle.net/11234/1-4839}, - note = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\'U}FAL}), Faculty of Mathematics and Physics, Charles University}, - copyright = {Creative Commons - Attribution-{NonCommercial}-{ShareAlike} 4.0 International ({CC} {BY}-{NC}-{SA} 4.0)}, - year = {2022} } - """, + bibtex_citation=r""" +@misc{11234/1-4839, + author = {Bafna, Niyati and {\v Z}abokrtsk{\'y}, Zden{\v e}k and Espa{\~n}a-Bonet, Cristina and van Genabith, Josef and Kumar, Lalit "Samyak Lalit" and Suman, Sharda and Shivay, Rahul}, + copyright = {Creative Commons - Attribution-{NonCommercial}-{ShareAlike} 4.0 International ({CC} {BY}-{NC}-{SA} 4.0)}, + note = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\'U}FAL}), Faculty of Mathematics and Physics, Charles University}, + title = {{HinDialect} 1.1: 26 Hindi-related languages and dialects of the Indic Continuum in North India}, + url = {http://hdl.handle.net/11234/1-4839}, + year = {2022}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/multilingual/IndicLangClassification.py b/mteb/tasks/Classification/multilingual/IndicLangClassification.py index 47564cf501..51d029061b 100644 --- a/mteb/tasks/Classification/multilingual/IndicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicLangClassification.py @@ -84,23 +84,25 @@ class IndicLangClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{madhani-etal-2023-bhasa, - title = "Bhasa-Abhijnaanam: Native-script and romanized Language Identification for 22 {I}ndic languages", - author = "Madhani, Yash and - Khapra, Mitesh M. and - Kunchukuttan, Anoop", - editor = "Rogers, Anna and - Boyd-Graber, Jordan and - Okazaki, Naoaki", - booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)", - month = jul, - year = "2023", - address = "Toronto, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.acl-short.71", - doi = "10.18653/v1/2023.acl-short.71", - pages = "816--826" -}""", + bibtex_citation=r""" +@inproceedings{madhani-etal-2023-bhasa, + address = {Toronto, Canada}, + author = {Madhani, Yash and +Khapra, Mitesh M. and +Kunchukuttan, Anoop}, + booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, + doi = {10.18653/v1/2023.acl-short.71}, + editor = {Rogers, Anna and +Boyd-Graber, Jordan and +Okazaki, Naoaki}, + month = jul, + pages = {816--826}, + publisher = {Association for Computational Linguistics}, + title = {Bhasa-Abhijnaanam: Native-script and romanized Language Identification for 22 {I}ndic languages}, + url = {https://aclanthology.org/2023.acl-short.71}, + year = {2023}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py b/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py index 3995917696..24e3dc8a9b 100644 --- a/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicNLPNewsClassification.py @@ -38,13 +38,14 @@ class IndicNLPNewsClassification(MultilingualTask, AbsTaskClassification): license="cc-by-nc-4.0", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation=""" - @article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085} -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py b/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py index 2687422935..1401bd8f87 100644 --- a/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py @@ -44,13 +44,15 @@ class IndicSentimentClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@article{doddapaneni2022towards, - title = {Towards Leaving No Indic Language Behind: Building Monolingual Corpora, Benchmark and Models for Indic Languages}, - author = {Sumanth Doddapaneni and Rahul Aralikatte and Gowtham Ramesh and Shreyansh Goyal and Mitesh M. Khapra and Anoop Kunchukuttan and Pratyush Kumar}, - journal = {Annual Meeting of the Association for Computational Linguistics}, - year = {2022}, - doi = {10.18653/v1/2023.acl-long.693} -}""", + bibtex_citation=r""" +@article{doddapaneni2022towards, + author = {Sumanth Doddapaneni and Rahul Aralikatte and Gowtham Ramesh and Shreyansh Goyal and Mitesh M. Khapra and Anoop Kunchukuttan and Pratyush Kumar}, + doi = {10.18653/v1/2023.acl-long.693}, + journal = {Annual Meeting of the Association for Computational Linguistics}, + title = {Towards Leaving No Indic Language Behind: Building Monolingual Corpora, Benchmark and Models for Indic Languages}, + year = {2022}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/multilingual/LanguageClassification.py b/mteb/tasks/Classification/multilingual/LanguageClassification.py index 9ebcfc7406..8398bddea0 100644 --- a/mteb/tasks/Classification/multilingual/LanguageClassification.py +++ b/mteb/tasks/Classification/multilingual/LanguageClassification.py @@ -49,21 +49,23 @@ class LanguageClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@InProceedings{conneau2018xnli, + bibtex_citation=r""" +@inproceedings{conneau2018xnli, author = {Conneau, Alexis - and Rinott, Ruty - and Lample, Guillaume - and Williams, Adina - and Bowman, Samuel R. - and Schwenk, Holger - and Stoyanov, Veselin}, - title = {XNLI: Evaluating Cross-lingual Sentence Representations}, +and Rinott, Ruty +and Lample, Guillaume +and Williams, Adina +and Bowman, Samuel R. +and Schwenk, Holger +and Stoyanov, Veselin}, booktitle = {Proceedings of the 2018 Conference on Empirical Methods - in Natural Language Processing}, - year = {2018}, - publisher = {Association for Computational Linguistics}, +in Natural Language Processing}, location = {Brussels, Belgium}, -}""", + publisher = {Association for Computational Linguistics}, + title = {XNLI: Evaluating Cross-lingual Sentence Representations}, + year = {2018}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py index eb8713fd6d..34684005c1 100644 --- a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py @@ -37,26 +37,27 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{li-etal-2021-mtop, - title = "{MTOP}: A Comprehensive Multilingual Task-Oriented Semantic Parsing Benchmark", - author = "Li, Haoran and - Arora, Abhinav and - Chen, Shuohui and - Gupta, Anchit and - Gupta, Sonal and - Mehdad, Yashar", - editor = "Merlo, Paola and - Tiedemann, Jorg and - Tsarfaty, Reut", - booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume", - month = apr, - year = "2021", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.eacl-main.257", - doi = "10.18653/v1/2021.eacl-main.257", - pages = "2950--2962", - abstract = "Scaling semantic parsing models for task-oriented dialog systems to new languages is often expensive and time-consuming due to the lack of available datasets. Available datasets suffer from several shortcomings: a) they contain few languages b) they contain small amounts of labeled examples per language c) they are based on the simple intent and slot detection paradigm for non-compositional queries. In this paper, we present a new multilingual dataset, called MTOP, comprising of 100k annotated utterances in 6 languages across 11 domains. We use this dataset and other publicly available datasets to conduct a comprehensive benchmarking study on using various state-of-the-art multilingual pre-trained models for task-oriented semantic parsing. We achieve an average improvement of +6.3 points on Slot F1 for the two existing multilingual datasets, over best results reported in their experiments. Furthermore, we demonstrate strong zero-shot performance using pre-trained models combined with automatic translation and alignment, and a proposed distant supervision method to reduce the noise in slot label projection.", + bibtex_citation=r""" +@inproceedings{li-etal-2021-mtop, + abstract = {Scaling semantic parsing models for task-oriented dialog systems to new languages is often expensive and time-consuming due to the lack of available datasets. Available datasets suffer from several shortcomings: a) they contain few languages b) they contain small amounts of labeled examples per language c) they are based on the simple intent and slot detection paradigm for non-compositional queries. In this paper, we present a new multilingual dataset, called MTOP, comprising of 100k annotated utterances in 6 languages across 11 domains. We use this dataset and other publicly available datasets to conduct a comprehensive benchmarking study on using various state-of-the-art multilingual pre-trained models for task-oriented semantic parsing. We achieve an average improvement of +6.3 points on Slot F1 for the two existing multilingual datasets, over best results reported in their experiments. Furthermore, we demonstrate strong zero-shot performance using pre-trained models combined with automatic translation and alignment, and a proposed distant supervision method to reduce the noise in slot label projection.}, + address = {Online}, + author = {Li, Haoran and +Arora, Abhinav and +Chen, Shuohui and +Gupta, Anchit and +Gupta, Sonal and +Mehdad, Yashar}, + booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume}, + doi = {10.18653/v1/2021.eacl-main.257}, + editor = {Merlo, Paola and +Tiedemann, Jorg and +Tsarfaty, Reut}, + month = apr, + pages = {2950--2962}, + publisher = {Association for Computational Linguistics}, + title = {{MTOP}: A Comprehensive Multilingual Task-Oriented Semantic Parsing Benchmark}, + url = {https://aclanthology.org/2021.eacl-main.257}, + year = {2021}, } """, prompt="Classify the intent domain of the given utterance in task-oriented conversation", diff --git a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py index 52863107b6..0450acfb36 100644 --- a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py @@ -37,26 +37,27 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{li-etal-2021-mtop, - title = "{MTOP}: A Comprehensive Multilingual Task-Oriented Semantic Parsing Benchmark", - author = "Li, Haoran and - Arora, Abhinav and - Chen, Shuohui and - Gupta, Anchit and - Gupta, Sonal and - Mehdad, Yashar", - editor = "Merlo, Paola and - Tiedemann, Jorg and - Tsarfaty, Reut", - booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume", - month = apr, - year = "2021", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.eacl-main.257", - doi = "10.18653/v1/2021.eacl-main.257", - pages = "2950--2962", - abstract = "Scaling semantic parsing models for task-oriented dialog systems to new languages is often expensive and time-consuming due to the lack of available datasets. Available datasets suffer from several shortcomings: a) they contain few languages b) they contain small amounts of labeled examples per language c) they are based on the simple intent and slot detection paradigm for non-compositional queries. In this paper, we present a new multilingual dataset, called MTOP, comprising of 100k annotated utterances in 6 languages across 11 domains. We use this dataset and other publicly available datasets to conduct a comprehensive benchmarking study on using various state-of-the-art multilingual pre-trained models for task-oriented semantic parsing. We achieve an average improvement of +6.3 points on Slot F1 for the two existing multilingual datasets, over best results reported in their experiments. Furthermore, we demonstrate strong zero-shot performance using pre-trained models combined with automatic translation and alignment, and a proposed distant supervision method to reduce the noise in slot label projection.", + bibtex_citation=r""" +@inproceedings{li-etal-2021-mtop, + abstract = {Scaling semantic parsing models for task-oriented dialog systems to new languages is often expensive and time-consuming due to the lack of available datasets. Available datasets suffer from several shortcomings: a) they contain few languages b) they contain small amounts of labeled examples per language c) they are based on the simple intent and slot detection paradigm for non-compositional queries. In this paper, we present a new multilingual dataset, called MTOP, comprising of 100k annotated utterances in 6 languages across 11 domains. We use this dataset and other publicly available datasets to conduct a comprehensive benchmarking study on using various state-of-the-art multilingual pre-trained models for task-oriented semantic parsing. We achieve an average improvement of +6.3 points on Slot F1 for the two existing multilingual datasets, over best results reported in their experiments. Furthermore, we demonstrate strong zero-shot performance using pre-trained models combined with automatic translation and alignment, and a proposed distant supervision method to reduce the noise in slot label projection.}, + address = {Online}, + author = {Li, Haoran and +Arora, Abhinav and +Chen, Shuohui and +Gupta, Anchit and +Gupta, Sonal and +Mehdad, Yashar}, + booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume}, + doi = {10.18653/v1/2021.eacl-main.257}, + editor = {Merlo, Paola and +Tiedemann, Jorg and +Tsarfaty, Reut}, + month = apr, + pages = {2950--2962}, + publisher = {Association for Computational Linguistics}, + title = {{MTOP}: A Comprehensive Multilingual Task-Oriented Semantic Parsing Benchmark}, + url = {https://aclanthology.org/2021.eacl-main.257}, + year = {2021}, } """, prompt="Classify the intent of the given utterance in task-oriented conversation", diff --git a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py index 66cfe71b17..4969c6794a 100644 --- a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py +++ b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py @@ -46,14 +46,16 @@ class MasakhaNEWSClassification(AbsTaskClassification, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{adelani2023masakhanews, - title={MasakhaNEWS: News Topic Classification for African languages}, - author={David Ifeoluwa Adelani and Marek Masiak and Israel Abebe Azime and Jesujoba Alabi and Atnafu Lambebo Tonja and Christine Mwase and Odunayo Ogundepo and Bonaventure F. P. Dossou and Akintunde Oladipo and Doreen Nixdorf and Chris Chinenye Emezue and sana al-azzawi and Blessing Sibanda and Davis David and Lolwethu Ndolela and Jonathan Mukiibi and Tunde Ajayi and Tatiana Moteu and Brian Odhiambo and Abraham Owodunni and Nnaemeka Obiefuna and Muhidin Mohamed and Shamsuddeen Hassan Muhammad and Teshome Mulugeta Ababu and Saheed Abdullahi Salahudeen and Mesay Gemeda Yigezu and Tajuddeen Gwadabe and Idris Abdulmumin and Mahlet Taye and Oluwabusayo Awoyomi and Iyanuoluwa Shode and Tolulope Adelani and Habiba Abdulganiyu and Abdul-Hakeem Omotayo and Adetola Adeeko and Abeeb Afolabi and Anuoluwapo Aremu and Olanrewaju Samuel and Clemencia Siro and Wangari Kimotho and Onyekachi Ogbu and Chinedu Mbonu and Chiamaka Chukwuneke and Samuel Fanijo and Jessica Ojo and Oyinkansola Awosan and Tadesse Kebede and Toadoum Sari Sakayo and Pamela Nyatsine and Freedmore Sidume and Oreen Yousuf and Mardiyyah Oduwole and Tshinu Tshinu and Ussen Kimanuka and Thina Diko and Siyanda Nxakama and Sinodos Nigusse and Abdulmejid Johar and Shafie Mohamed and Fuad Mire Hassan and Moges Ahmed Mehamed and Evrard Ngabire and Jules Jules and Ivan Ssenkungu and Pontus Stenetorp}, - year={2023}, - eprint={2304.09972}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{adelani2023masakhanews, + archiveprefix = {arXiv}, + author = {David Ifeoluwa Adelani and Marek Masiak and Israel Abebe Azime and Jesujoba Alabi and Atnafu Lambebo Tonja and Christine Mwase and Odunayo Ogundepo and Bonaventure F. P. Dossou and Akintunde Oladipo and Doreen Nixdorf and Chris Chinenye Emezue and sana al-azzawi and Blessing Sibanda and Davis David and Lolwethu Ndolela and Jonathan Mukiibi and Tunde Ajayi and Tatiana Moteu and Brian Odhiambo and Abraham Owodunni and Nnaemeka Obiefuna and Muhidin Mohamed and Shamsuddeen Hassan Muhammad and Teshome Mulugeta Ababu and Saheed Abdullahi Salahudeen and Mesay Gemeda Yigezu and Tajuddeen Gwadabe and Idris Abdulmumin and Mahlet Taye and Oluwabusayo Awoyomi and Iyanuoluwa Shode and Tolulope Adelani and Habiba Abdulganiyu and Abdul-Hakeem Omotayo and Adetola Adeeko and Abeeb Afolabi and Anuoluwapo Aremu and Olanrewaju Samuel and Clemencia Siro and Wangari Kimotho and Onyekachi Ogbu and Chinedu Mbonu and Chiamaka Chukwuneke and Samuel Fanijo and Jessica Ojo and Oyinkansola Awosan and Tadesse Kebede and Toadoum Sari Sakayo and Pamela Nyatsine and Freedmore Sidume and Oreen Yousuf and Mardiyyah Oduwole and Tshinu Tshinu and Ussen Kimanuka and Thina Diko and Siyanda Nxakama and Sinodos Nigusse and Abdulmejid Johar and Shafie Mohamed and Fuad Mire Hassan and Moges Ahmed Mehamed and Evrard Ngabire and Jules Jules and Ivan Ssenkungu and Pontus Stenetorp}, + eprint = {2304.09972}, + primaryclass = {cs.CL}, + title = {MasakhaNEWS: News Topic Classification for African languages}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py index 4538c3d4f7..b68eed3785 100644 --- a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py @@ -82,13 +82,15 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated and localized", # with the exception of the English data - bibtex_citation="""@misc{fitzgerald2022massive, - title={MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages}, - author={Jack FitzGerald and Christopher Hench and Charith Peris and Scott Mackie and Kay Rottmann and Ana Sanchez and Aaron Nash and Liam Urbach and Vishesh Kakarala and Richa Singh and Swetha Ranganath and Laurie Crist and Misha Britan and Wouter Leeuwis and Gokhan Tur and Prem Natarajan}, - year={2022}, - eprint={2204.08582}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{fitzgerald2022massive, + archiveprefix = {arXiv}, + author = {Jack FitzGerald and Christopher Hench and Charith Peris and Scott Mackie and Kay Rottmann and Ana Sanchez and Aaron Nash and Liam Urbach and Vishesh Kakarala and Richa Singh and Swetha Ranganath and Laurie Crist and Misha Britan and Wouter Leeuwis and Gokhan Tur and Prem Natarajan}, + eprint = {2204.08582}, + primaryclass = {cs.CL}, + title = {MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages}, + year = {2022}, +} +""", prompt="Given a user utterance as query, find the user intents", ) diff --git a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py index d48394318c..3ddc95dced 100644 --- a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py @@ -82,13 +82,15 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated and localized", # with the exception of the English data - bibtex_citation="""@misc{fitzgerald2022massive, - title={MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages}, - author={Jack FitzGerald and Christopher Hench and Charith Peris and Scott Mackie and Kay Rottmann and Ana Sanchez and Aaron Nash and Liam Urbach and Vishesh Kakarala and Richa Singh and Swetha Ranganath and Laurie Crist and Misha Britan and Wouter Leeuwis and Gokhan Tur and Prem Natarajan}, - year={2022}, - eprint={2204.08582}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{fitzgerald2022massive, + archiveprefix = {arXiv}, + author = {Jack FitzGerald and Christopher Hench and Charith Peris and Scott Mackie and Kay Rottmann and Ana Sanchez and Aaron Nash and Liam Urbach and Vishesh Kakarala and Richa Singh and Swetha Ranganath and Laurie Crist and Misha Britan and Wouter Leeuwis and Gokhan Tur and Prem Natarajan}, + eprint = {2204.08582}, + primaryclass = {cs.CL}, + title = {MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages}, + year = {2022}, +} +""", prompt="Given a user utterance as query, find the user scenarios", ) diff --git a/mteb/tasks/Classification/multilingual/MultiHateClassification.py b/mteb/tasks/Classification/multilingual/MultiHateClassification.py index f20ba592c1..ab6c35db09 100644 --- a/mteb/tasks/Classification/multilingual/MultiHateClassification.py +++ b/mteb/tasks/Classification/multilingual/MultiHateClassification.py @@ -45,53 +45,53 @@ class MultiHateClassification(MultilingualTask, AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @inproceedings{rottger-etal-2021-hatecheck, - title = "{H}ate{C}heck: Functional Tests for Hate Speech Detection Models", - author = {R{\"o}ttger, Paul and - Vidgen, Bertie and - Nguyen, Dong and - Waseem, Zeerak and - Margetts, Helen and - Pierrehumbert, Janet}, - editor = "Zong, Chengqing and - Xia, Fei and - Li, Wenjie and - Navigli, Roberto", - booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", - month = aug, - year = "2021", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.acl-long.4", - doi = "10.18653/v1/2021.acl-long.4", - pages = "41--58", - abstract = "Detecting online hate is a difficult task that even state-of-the-art models struggle with. Typically, hate speech detection models are evaluated by measuring their performance on held-out test data using metrics such as accuracy and F1 score. However, this approach makes it difficult to identify specific model weak points. It also risks overestimating generalisable model performance due to increasingly well-evidenced systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, we introduce HateCheck, a suite of functional tests for hate speech detection models. We specify 29 model functionalities motivated by a review of previous research and a series of interviews with civil society stakeholders. We craft test cases for each functionality and validate their quality through a structured annotation process. To illustrate HateCheck{'}s utility, we test near-state-of-the-art transformer models as well as two popular commercial models, revealing critical model weaknesses.", - } + bibtex_citation=r""" +@inproceedings{rottger-etal-2021-hatecheck, + abstract = {Detecting online hate is a difficult task that even state-of-the-art models struggle with. Typically, hate speech detection models are evaluated by measuring their performance on held-out test data using metrics such as accuracy and F1 score. However, this approach makes it difficult to identify specific model weak points. It also risks overestimating generalisable model performance due to increasingly well-evidenced systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, we introduce HateCheck, a suite of functional tests for hate speech detection models. We specify 29 model functionalities motivated by a review of previous research and a series of interviews with civil society stakeholders. We craft test cases for each functionality and validate their quality through a structured annotation process. To illustrate HateCheck{'}s utility, we test near-state-of-the-art transformer models as well as two popular commercial models, revealing critical model weaknesses.}, + address = {Online}, + author = {R{\"o}ttger, Paul and +Vidgen, Bertie and +Nguyen, Dong and +Waseem, Zeerak and +Margetts, Helen and +Pierrehumbert, Janet}, + booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)}, + doi = {10.18653/v1/2021.acl-long.4}, + editor = {Zong, Chengqing and +Xia, Fei and +Li, Wenjie and +Navigli, Roberto}, + month = aug, + pages = {41--58}, + publisher = {Association for Computational Linguistics}, + title = {{H}ate{C}heck: Functional Tests for Hate Speech Detection Models}, + url = {https://aclanthology.org/2021.acl-long.4}, + year = {2021}, +} - @inproceedings{rottger-etal-2022-multilingual, - title = "Multilingual {H}ate{C}heck: Functional Tests for Multilingual Hate Speech Detection Models", - author = {R{\"o}ttger, Paul and - Seelawi, Haitham and - Nozza, Debora and - Talat, Zeerak and - Vidgen, Bertie}, - editor = "Narang, Kanika and - Mostafazadeh Davani, Aida and - Mathias, Lambert and - Vidgen, Bertie and - Talat, Zeerak", - booktitle = "Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)", - month = jul, - year = "2022", - address = "Seattle, Washington (Hybrid)", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.woah-1.15", - doi = "10.18653/v1/2022.woah-1.15", - pages = "154--169", - abstract = "Hate speech detection models are typically evaluated on held-out test sets. However, this risks painting an incomplete and potentially misleading picture of model performance because of increasingly well-documented systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, recent research has thus introduced functional tests for hate speech detection models. However, these tests currently only exist for English-language content, which means that they cannot support the development of more effective models in other languages spoken by billions across the world. To help address this issue, we introduce Multilingual HateCheck (MHC), a suite of functional tests for multilingual hate speech detection models. MHC covers 34 functionalities across ten languages, which is more languages than any other hate speech dataset. To illustrate MHC{'}s utility, we train and test a high-performing multilingual hate speech detection model, and reveal critical model weaknesses for monolingual and cross-lingual applications.", - } - """, +@inproceedings{rottger-etal-2022-multilingual, + abstract = {Hate speech detection models are typically evaluated on held-out test sets. However, this risks painting an incomplete and potentially misleading picture of model performance because of increasingly well-documented systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, recent research has thus introduced functional tests for hate speech detection models. However, these tests currently only exist for English-language content, which means that they cannot support the development of more effective models in other languages spoken by billions across the world. To help address this issue, we introduce Multilingual HateCheck (MHC), a suite of functional tests for multilingual hate speech detection models. MHC covers 34 functionalities across ten languages, which is more languages than any other hate speech dataset. To illustrate MHC{'}s utility, we train and test a high-performing multilingual hate speech detection model, and reveal critical model weaknesses for monolingual and cross-lingual applications.}, + address = {Seattle, Washington (Hybrid)}, + author = {R{\"o}ttger, Paul and +Seelawi, Haitham and +Nozza, Debora and +Talat, Zeerak and +Vidgen, Bertie}, + booktitle = {Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)}, + doi = {10.18653/v1/2022.woah-1.15}, + editor = {Narang, Kanika and +Mostafazadeh Davani, Aida and +Mathias, Lambert and +Vidgen, Bertie and +Talat, Zeerak}, + month = jul, + pages = {154--169}, + publisher = {Association for Computational Linguistics}, + title = {Multilingual {H}ate{C}heck: Functional Tests for Multilingual Hate Speech Detection Models}, + url = {https://aclanthology.org/2022.woah-1.15}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py b/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py index 1108dd7cf8..b6529c87d5 100644 --- a/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/MultilingualSentimentClassification.py @@ -64,31 +64,31 @@ class MultilingualSentimentClassification(AbsTaskClassification, MultilingualTas annotations_creators="derived", dialect=["ar-dz"], sample_creation="found", - bibtex_citation=""" - @inproceedings{mollanorozy-etal-2023-cross, - title = "Cross-lingual Transfer Learning with \{P\}ersian", - author = "Mollanorozy, Sepideh and - Tanti, Marc and - Nissim, Malvina", - editor = "Beinborn, Lisa and - Goswami, Koustava and - Murado{\\u{g}}lu, Saliha and - Sorokin, Alexey and - Kumar, Ritesh and - Shcherbakov, Andreas and - Ponti, Edoardo M. and - Cotterell, Ryan and - Vylomova, Ekaterina", - booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP", - month = may, - year = "2023", - address = "Dubrovnik, Croatia", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.sigtyp-1.9", - doi = "10.18653/v1/2023.sigtyp-1.9", - pages = "89--95", - } - """, + bibtex_citation=r""" +@inproceedings{mollanorozy-etal-2023-cross, + address = {Dubrovnik, Croatia}, + author = {Mollanorozy, Sepideh and +Tanti, Marc and +Nissim, Malvina}, + booktitle = {Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP}, + doi = {10.18653/v1/2023.sigtyp-1.9}, + editor = {Beinborn, Lisa and +Goswami, Koustava and +Murado{\\u{g}}lu, Saliha and +Sorokin, Alexey and +Kumar, Ritesh and +Shcherbakov, Andreas and +Ponti, Edoardo M. and +Cotterell, Ryan and +Vylomova, Ekaterina}, + month = may, + pages = {89--95}, + publisher = {Association for Computational Linguistics}, + title = {Cross-lingual Transfer Learning with \{P\}ersian}, + url = {https://aclanthology.org/2023.sigtyp-1.9}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/NaijaSenti.py b/mteb/tasks/Classification/multilingual/NaijaSenti.py index b31333236e..f9767baa71 100644 --- a/mteb/tasks/Classification/multilingual/NaijaSenti.py +++ b/mteb/tasks/Classification/multilingual/NaijaSenti.py @@ -37,29 +37,30 @@ class NaijaSenti(AbsTaskClassification, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{muhammad-etal-2022-naijasenti, - title = "{N}aija{S}enti: A {N}igerian {T}witter Sentiment Corpus for Multilingual Sentiment Analysis", - author = "Muhammad, Shamsuddeen Hassan and - Adelani, David Ifeoluwa and - Ruder, Sebastian and - Ahmad, Ibrahim Sa{'}id and - Abdulmumin, Idris and - Bello, Bello Shehu and - Choudhury, Monojit and - Emezue, Chris Chinenye and - Abdullahi, Saheed Salahudeen and - Aremu, Anuoluwapo and - Jorge, Al{\'\i}pio and - Brazdil, Pavel", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.63", - pages = "590--602", - }""", + bibtex_citation=r""" +@inproceedings{muhammad-etal-2022-naijasenti, + address = {Marseille, France}, + author = {Muhammad, Shamsuddeen Hassan and +Adelani, David Ifeoluwa and +Ruder, Sebastian and +Ahmad, Ibrahim Sa{'}id and +Abdulmumin, Idris and +Bello, Bello Shehu and +Choudhury, Monojit and +Emezue, Chris Chinenye and +Abdullahi, Saheed Salahudeen and +Aremu, Anuoluwapo and +Jorge, Al{\'\i}pio and +Brazdil, Pavel}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + month = jun, + pages = {590--602}, + publisher = {European Language Resources Association}, + title = {{N}aija{S}enti: A {N}igerian {T}witter Sentiment Corpus for Multilingual Sentiment Analysis}, + url = {https://aclanthology.org/2022.lrec-1.63}, + year = {2022}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/Classification/multilingual/NordicLangClassification.py b/mteb/tasks/Classification/multilingual/NordicLangClassification.py index 2a89e44a23..7854eca4fb 100644 --- a/mteb/tasks/Classification/multilingual/NordicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/NordicLangClassification.py @@ -35,24 +35,25 @@ class NordicLangClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{haas-derczynski-2021-discriminating, - title = "Discriminating Between Similar {N}ordic Languages", - author = "Haas, Ren{\'e} and - Derczynski, Leon", - editor = {Zampieri, Marcos and - Nakov, Preslav and - Ljube{\v{s}}i{\'c}, Nikola and - Tiedemann, J{\"o}rg and - Scherrer, Yves and - Jauhiainen, Tommi}, - booktitle = "Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects", - month = apr, - year = "2021", - address = "Kiyv, Ukraine", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.vardial-1.8", - pages = "67--75", - abstract = "Automatic language identification is a challenging problem. Discriminating between closely related languages is especially difficult. This paper presents a machine learning approach for automatic language identification for the Nordic languages, which often suffer miscategorisation by existing state-of-the-art tools. Concretely we will focus on discrimination between six Nordic languages: Danish, Swedish, Norwegian (Nynorsk), Norwegian (Bokm{\aa}l), Faroese and Icelandic.", + bibtex_citation=r""" +@inproceedings{haas-derczynski-2021-discriminating, + abstract = {Automatic language identification is a challenging problem. Discriminating between closely related languages is especially difficult. This paper presents a machine learning approach for automatic language identification for the Nordic languages, which often suffer miscategorisation by existing state-of-the-art tools. Concretely we will focus on discrimination between six Nordic languages: Danish, Swedish, Norwegian (Nynorsk), Norwegian (Bokm{\aa}l), Faroese and Icelandic.}, + address = {Kiyv, Ukraine}, + author = {Haas, Ren{\'e} and +Derczynski, Leon}, + booktitle = {Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects}, + editor = {Zampieri, Marcos and +Nakov, Preslav and +Ljube{\v{s}}i{\'c}, Nikola and +Tiedemann, J{\"o}rg and +Scherrer, Yves and +Jauhiainen, Tommi}, + month = apr, + pages = {67--75}, + publisher = {Association for Computational Linguistics}, + title = {Discriminating Between Similar {N}ordic Languages}, + url = {https://aclanthology.org/2021.vardial-1.8}, + year = {2021}, } """, prompt="Classify texts based on language", diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py index fca11b365c..857fb161df 100644 --- a/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py +++ b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py @@ -40,18 +40,18 @@ class NusaParagraphEmotionClassification(MultilingualTask, AbsTaskClassification annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{cahyawijaya-etal-2023-nusawrites, - title = "NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages", - author = "Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale", - editor = "Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa", - booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = nov, - year = "2023", - address = "Nusa Dua, Bali", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.ijcnlp-main.60", - pages = "921--945", - } - """, + bibtex_citation=r""" +@inproceedings{cahyawijaya-etal-2023-nusawrites, + address = {Nusa Dua, Bali}, + author = {Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale}, + booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)}, + editor = {Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa}, + month = nov, + pages = {921--945}, + publisher = {Association for Computational Linguistics}, + title = {NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages}, + url = {https://aclanthology.org/2023.ijcnlp-main.60}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py index effd257709..cddeced561 100644 --- a/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py +++ b/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py @@ -40,18 +40,18 @@ class NusaParagraphTopicClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{cahyawijaya-etal-2023-nusawrites, - title = "NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages", - author = "Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale", - editor = "Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa", - booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = nov, - year = "2023", - address = "Nusa Dua, Bali", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.ijcnlp-main.60", - pages = "921--945", - } - """, + bibtex_citation=r""" +@inproceedings{cahyawijaya-etal-2023-nusawrites, + address = {Nusa Dua, Bali}, + author = {Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale}, + booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)}, + editor = {Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa}, + month = nov, + pages = {921--945}, + publisher = {Association for Computational Linguistics}, + title = {NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages}, + url = {https://aclanthology.org/2023.ijcnlp-main.60}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/Classification/multilingual/NusaXSenti.py b/mteb/tasks/Classification/multilingual/NusaXSenti.py index 1b9fa2460a..368abb1e6e 100644 --- a/mteb/tasks/Classification/multilingual/NusaXSenti.py +++ b/mteb/tasks/Classification/multilingual/NusaXSenti.py @@ -41,18 +41,18 @@ class NusaXSentiClassification(AbsTaskClassification, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{winata2022nusax, - title={NusaX: Multilingual Parallel Sentiment Dataset for 10 Indonesian Local Languages}, - author={Winata, Genta Indra and Aji, Alham Fikri and Cahyawijaya, - Samuel and Mahendra, Rahmad and Koto, Fajri and Romadhony, - Ade and Kurniawan, Kemal and Moeljadi, David and Prasojo, - Radityo Eko and Fung, Pascale and Baldwin, Timothy and Lau, - Jey Han and Sennrich, Rico and Ruder, Sebastian}, - year={2022}, - eprint={2205.15960}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + bibtex_citation=r""" +@misc{winata2022nusax, + archiveprefix = {arXiv}, + author = {Winata, Genta Indra and Aji, Alham Fikri and Cahyawijaya, +Samuel and Mahendra, Rahmad and Koto, Fajri and Romadhony, +Ade and Kurniawan, Kemal and Moeljadi, David and Prasojo, +Radityo Eko and Fung, Pascale and Baldwin, Timothy and Lau, +Jey Han and Sennrich, Rico and Ruder, Sebastian}, + eprint = {2205.15960}, + primaryclass = {cs.CL}, + title = {NusaX: Multilingual Parallel Sentiment Dataset for 10 Indonesian Local Languages}, + year = {2022}, } """, ) diff --git a/mteb/tasks/Classification/multilingual/SIB200Classification.py b/mteb/tasks/Classification/multilingual/SIB200Classification.py index 88e5d4b9c8..bbfb20e40b 100644 --- a/mteb/tasks/Classification/multilingual/SIB200Classification.py +++ b/mteb/tasks/Classification/multilingual/SIB200Classification.py @@ -232,12 +232,14 @@ class SIB200Classification(MultilingualTask, AbsTaskClassification): annotations_creators="expert-annotated", # expert annotated for English --> human translations dialect=[], sample_creation="human-translated and localized", - bibtex_citation="""@article{adelani2023sib, - title={SIB-200: A simple, inclusive, and big evaluation dataset for topic classification in 200+ languages and dialects}, - author={Adelani, David Ifeoluwa and Liu, Hannah and Shen, Xiaoyu and Vassilyev, Nikita and Alabi, Jesujoba O and Mao, Yanke and Gao, Haonan and Lee, Annie En-Shiun}, - journal={arXiv preprint arXiv:2309.07445}, - year={2023} - }""", + bibtex_citation=r""" +@article{adelani2023sib, + author = {Adelani, David Ifeoluwa and Liu, Hannah and Shen, Xiaoyu and Vassilyev, Nikita and Alabi, Jesujoba O and Mao, Yanke and Gao, Haonan and Lee, Annie En-Shiun}, + journal = {arXiv preprint arXiv:2309.07445}, + title = {SIB-200: A simple, inclusive, and big evaluation dataset for topic classification in 200+ languages and dialects}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/multilingual/ScalaClassification.py b/mteb/tasks/Classification/multilingual/ScalaClassification.py index fec0b48b18..d037ac9c9d 100644 --- a/mteb/tasks/Classification/multilingual/ScalaClassification.py +++ b/mteb/tasks/Classification/multilingual/ScalaClassification.py @@ -38,19 +38,21 @@ class ScalaClassification(AbsTaskClassification, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{nielsen-2023-scandeval, - title = "{S}cand{E}val: A Benchmark for {S}candinavian Natural Language Processing", - author = "Nielsen, Dan", - editor = {Alum{\"a}e, Tanel and - Fishel, Mark}, - booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may, - year = "2023", - address = "T{\'o}rshavn, Faroe Islands", - publisher = "University of Tartu Library", - url = "https://aclanthology.org/2023.nodalida-1.20", - pages = "185--201", - }""", + bibtex_citation=r""" +@inproceedings{nielsen-2023-scandeval, + address = {T{\'o}rshavn, Faroe Islands}, + author = {Nielsen, Dan}, + booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Alum{\"a}e, Tanel and +Fishel, Mark}, + month = may, + pages = {185--201}, + publisher = {University of Tartu Library}, + title = {{S}cand{E}val: A Benchmark for {S}candinavian Natural Language Processing}, + url = {https://aclanthology.org/2023.nodalida-1.20}, + year = {2023}, +} +""", prompt="Classify passages in Scandinavian Languages based on linguistic acceptability", ) diff --git a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py index 217d300ec0..14c3fa3f85 100644 --- a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py +++ b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py @@ -40,13 +40,15 @@ class SouthAfricanLangClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{south-african-language-identification, - author = {ExploreAI Academy, Joanne M}, - title = {South African Language Identification}, - publisher = {Kaggle}, - year = {2022}, - url = {https://kaggle.com/competitions/south-african-language-identification} -}""", + bibtex_citation=r""" +@misc{south-african-language-identification, + author = {ExploreAI Academy, Joanne M}, + publisher = {Kaggle}, + title = {South African Language Identification}, + url = {https://kaggle.com/competitions/south-african-language-identification}, + year = {2022}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py b/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py index 92aa43268c..09650dac8b 100644 --- a/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py +++ b/mteb/tasks/Classification/multilingual/SwissJudgementClassification.py @@ -34,13 +34,14 @@ class SwissJudgementClassification(MultilingualTask, AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{niklaus2022empirical, - title={An Empirical Study on Cross-X Transfer for Legal Judgment Prediction}, - author={Joel Niklaus and Matthias Stürmer and Ilias Chalkidis}, - year={2022}, - eprint={2209.12325}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + bibtex_citation=r""" +@misc{niklaus2022empirical, + archiveprefix = {arXiv}, + author = {Joel Niklaus and Matthias Stürmer and Ilias Chalkidis}, + eprint = {2209.12325}, + primaryclass = {cs.CL}, + title = {An Empirical Study on Cross-X Transfer for Legal Judgment Prediction}, + year = {2022}, } """, ) diff --git a/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py b/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py index 4105f975a9..1d05a0f885 100644 --- a/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/TweetSentimentClassification.py @@ -39,22 +39,22 @@ class TweetSentimentClassification(MultilingualTask, AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{barbieri-etal-2022-xlm, - title = "{XLM}-{T}: Multilingual Language Models in {T}witter for Sentiment Analysis and Beyond", - author = "Barbieri, Francesco and - Espinosa Anke, Luis and - Camacho-Collados, Jose", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.27", - pages = "258--266", - abstract = "Language models are ubiquitous in current NLP, and their multilingual capacity has recently attracted considerable attention. However, current analyses have almost exclusively focused on (multilingual variants of) standard benchmarks, and have relied on clean pre-training and task-specific corpora as multilingual signals. In this paper, we introduce XLM-T, a model to train and evaluate multilingual language models in Twitter. In this paper we provide: (1) a new strong multilingual baseline consisting of an XLM-R (Conneau et al. 2020) model pre-trained on millions of tweets in over thirty languages, alongside starter code to subsequently fine-tune on a target task; and (2) a set of unified sentiment analysis Twitter datasets in eight different languages and a XLM-T model trained on this dataset.", - } - """, + bibtex_citation=r""" +@inproceedings{barbieri-etal-2022-xlm, + abstract = {Language models are ubiquitous in current NLP, and their multilingual capacity has recently attracted considerable attention. However, current analyses have almost exclusively focused on (multilingual variants of) standard benchmarks, and have relied on clean pre-training and task-specific corpora as multilingual signals. In this paper, we introduce XLM-T, a model to train and evaluate multilingual language models in Twitter. In this paper we provide: (1) a new strong multilingual baseline consisting of an XLM-R (Conneau et al. 2020) model pre-trained on millions of tweets in over thirty languages, alongside starter code to subsequently fine-tune on a target task; and (2) a set of unified sentiment analysis Twitter datasets in eight different languages and a XLM-T model trained on this dataset.}, + address = {Marseille, France}, + author = {Barbieri, Francesco and +Espinosa Anke, Luis and +Camacho-Collados, Jose}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + month = jun, + pages = {258--266}, + publisher = {European Language Resources Association}, + title = {{XLM}-{T}: Multilingual Language Models in {T}witter for Sentiment Analysis and Beyond}, + url = {https://aclanthology.org/2022.lrec-1.27}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/mya/MyanmarNews.py b/mteb/tasks/Classification/mya/MyanmarNews.py index 8418e20533..0d8daa8e1a 100644 --- a/mteb/tasks/Classification/mya/MyanmarNews.py +++ b/mteb/tasks/Classification/mya/MyanmarNews.py @@ -27,15 +27,16 @@ class MyanmarNews(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""" - @inproceedings{Khine2017, - author = {A. H. Khine and K. T. Nwet and K. M. Soe}, - title = {Automatic Myanmar News Classification}, - booktitle = {15th Proceedings of International Conference on Computer Applications}, - year = {2017}, - month = {February}, - pages = {401--408} - }""", + bibtex_citation=r""" +@inproceedings{Khine2017, + author = {A. H. Khine and K. T. Nwet and K. M. Soe}, + booktitle = {15th Proceedings of International Conference on Computer Applications}, + month = {February}, + pages = {401--408}, + title = {Automatic Myanmar News Classification}, + year = {2017}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/nep/NepaliNewsClassification.py b/mteb/tasks/Classification/nep/NepaliNewsClassification.py index 85cc8d9661..d266e38a6b 100644 --- a/mteb/tasks/Classification/nep/NepaliNewsClassification.py +++ b/mteb/tasks/Classification/nep/NepaliNewsClassification.py @@ -26,27 +26,27 @@ class NepaliNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{arora-2020-inltk, - title = "i{NLTK}: Natural Language Toolkit for Indic Languages", - author = "Arora, Gaurav", - editor = "Park, Eunjeong L. and - Hagiwara, Masato and - Milajevs, Dmitrijs and - Liu, Nelson F. and - Chauhan, Geeticka and - Tan, Liling", - booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)", - month = nov, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.nlposs-1.10", - doi = "10.18653/v1/2020.nlposs-1.10", - pages = "66--71", - abstract = "We present iNLTK, an open-source NLP library consisting of pre-trained language models and out-of-the-box support for Data Augmentation, Textual Similarity, Sentence Embeddings, Word Embeddings, Tokenization and Text Generation in 13 Indic Languages. By using pre-trained models from iNLTK for text classification on publicly available datasets, we significantly outperform previously reported results. On these datasets, we also show that by using pre-trained models and data augmentation from iNLTK, we can achieve more than 95{\%} of the previous best performance by using less than 10{\%} of the training data. iNLTK is already being widely used by the community and has 40,000+ downloads, 600+ stars and 100+ forks on GitHub.", - } - """, + bibtex_citation=r""" +@inproceedings{arora-2020-inltk, + abstract = {We present iNLTK, an open-source NLP library consisting of pre-trained language models and out-of-the-box support for Data Augmentation, Textual Similarity, Sentence Embeddings, Word Embeddings, Tokenization and Text Generation in 13 Indic Languages. By using pre-trained models from iNLTK for text classification on publicly available datasets, we significantly outperform previously reported results. On these datasets, we also show that by using pre-trained models and data augmentation from iNLTK, we can achieve more than 95{\%} of the previous best performance by using less than 10{\%} of the training data. iNLTK is already being widely used by the community and has 40,000+ downloads, 600+ stars and 100+ forks on GitHub.}, + address = {Online}, + author = {Arora, Gaurav}, + booktitle = {Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)}, + doi = {10.18653/v1/2020.nlposs-1.10}, + editor = {Park, Eunjeong L. and +Hagiwara, Masato and +Milajevs, Dmitrijs and +Liu, Nelson F. and +Chauhan, Geeticka and +Tan, Liling}, + month = nov, + pages = {66--71}, + publisher = {Association for Computational Linguistics}, + title = {i{NLTK}: Natural Language Toolkit for Indic Languages}, + url = {https://aclanthology.org/2020.nlposs-1.10}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py b/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py index f0ee1b07dc..882562f5f4 100644 --- a/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py +++ b/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py @@ -27,20 +27,21 @@ class DutchBookReviewSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/abs-1910-00896, - author = {Benjamin, van der Burgh and - Suzan, Verberne}, - title = {The merits of Universal Language Model Fine-tuning for Small Datasets - - a case with Dutch book reviews}, - journal = {CoRR}, - volume = {abs/1910.00896}, - year = {2019}, - url = {http://arxiv.org/abs/1910.00896}, - archivePrefix = {arXiv}, - eprint = {1910.00896}, + bibtex_citation=r""" +@article{DBLP:journals/corr/abs-1910-00896, + archiveprefix = {arXiv}, + author = {Benjamin, van der Burgh and +Suzan, Verberne}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/abs-1910-00896.bib}, + eprint = {1910.00896}, + journal = {CoRR}, timestamp = {Fri, 04 Oct 2019 12:28:06 +0200}, - biburl = {https://dblp.org/rec/journals/corr/abs-1910-00896.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} + title = {The merits of Universal Language Model Fine-tuning for Small Datasets +- a case with Dutch book reviews}, + url = {http://arxiv.org/abs/1910.00896}, + volume = {abs/1910.00896}, + year = {2019}, } """, ) diff --git a/mteb/tasks/Classification/nob/NoRecClassification.py b/mteb/tasks/Classification/nob/NoRecClassification.py index 8391c880c0..3dfa084b0e 100644 --- a/mteb/tasks/Classification/nob/NoRecClassification.py +++ b/mteb/tasks/Classification/nob/NoRecClassification.py @@ -27,34 +27,35 @@ class NoRecClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{velldal-etal-2018-norec, - title = "{N}o{R}e{C}: The {N}orwegian Review Corpus", - author = "Velldal, Erik and - {\\O}vrelid, Lilja and - Bergem, Eivind Alexander and - Stadsnes, Cathrine and - Touileb, Samia and - J{\\o}rgensen, Fredrik", - editor = "Calzolari, Nicoletta and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Hasida, Koiti and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\\'e}l{\\`e}ne and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios and - Tokunaga, Takenobu", - booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)", - month = may, - year = "2018", - address = "Miyazaki, Japan", - publisher = "European Language Resources Association (ELRA)", - url = "https://aclanthology.org/L18-1661", + bibtex_citation=r""" +@inproceedings{velldal-etal-2018-norec, + address = {Miyazaki, Japan}, + author = {Velldal, Erik and +{\\O}vrelid, Lilja and +Bergem, Eivind Alexander and +Stadsnes, Cathrine and +Touileb, Samia and +J{\\o}rgensen, Fredrik}, + booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)}, + editor = {Calzolari, Nicoletta and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Hasida, Koiti and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\\'e}l{\\`e}ne and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios and +Tokunaga, Takenobu}, + month = may, + publisher = {European Language Resources Association (ELRA)}, + title = {{N}o{R}e{C}: The {N}orwegian Review Corpus}, + url = {https://aclanthology.org/L18-1661}, + year = {2018}, } """, prompt="Classify Norwegian reviews by sentiment", diff --git a/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py b/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py index e46ae6612a..b91c704063 100644 --- a/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py +++ b/mteb/tasks/Classification/nob/NorwegianParliamentClassification.py @@ -28,22 +28,24 @@ class NorwegianParliamentClassification(AbsTaskClassification): annotations_creators="derived", # based on the speaker affiliation dialect=[], # unknown sample_creation="found", - bibtex_citation="""@inproceedings{kummervold-etal-2021-operationalizing, - title = "Operationalizing a National Digital Library: The Case for a {N}orwegian Transformer Model", - author = "Kummervold, Per E and - De la Rosa, Javier and - Wetjen, Freddy and - Brygfjeld, Svein Arne", - editor = "Dobnik, Simon and - {\O}vrelid, Lilja", - booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may # " 31--2 " # jun, - year = "2021", - address = "Reykjavik, Iceland (Online)", - publisher = {Link{\"o}ping University Electronic Press, Sweden}, - url = "https://aclanthology.org/2021.nodalida-main.3", - pages = "20--29", - abstract = "In this work, we show the process of building a large-scale training set from digital and digitized collections at a national library. The resulting Bidirectional Encoder Representations from Transformers (BERT)-based language model for Norwegian outperforms multilingual BERT (mBERT) models in several token and sequence classification tasks for both Norwegian Bokm{\aa}l and Norwegian Nynorsk. Our model also improves the mBERT performance for other languages present in the corpus such as English, Swedish, and Danish. For languages not included in the corpus, the weights degrade moderately while keeping strong multilingual properties. Therefore, we show that building high-quality models within a memory institution using somewhat noisy optical character recognition (OCR) content is feasible, and we hope to pave the way for other memory institutions to follow.", -}""", + bibtex_citation=r""" +@inproceedings{kummervold-etal-2021-operationalizing, + abstract = {In this work, we show the process of building a large-scale training set from digital and digitized collections at a national library. The resulting Bidirectional Encoder Representations from Transformers (BERT)-based language model for Norwegian outperforms multilingual BERT (mBERT) models in several token and sequence classification tasks for both Norwegian Bokm{\aa}l and Norwegian Nynorsk. Our model also improves the mBERT performance for other languages present in the corpus such as English, Swedish, and Danish. For languages not included in the corpus, the weights degrade moderately while keeping strong multilingual properties. Therefore, we show that building high-quality models within a memory institution using somewhat noisy optical character recognition (OCR) content is feasible, and we hope to pave the way for other memory institutions to follow.}, + address = {Reykjavik, Iceland (Online)}, + author = {Kummervold, Per E and +De la Rosa, Javier and +Wetjen, Freddy and +Brygfjeld, Svein Arne}, + booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Dobnik, Simon and +{\O}vrelid, Lilja}, + month = may # { 31--2 } # jun, + pages = {20--29}, + publisher = {Link{\"o}ping University Electronic Press, Sweden}, + title = {Operationalizing a National Digital Library: The Case for a {N}orwegian Transformer Model}, + url = {https://aclanthology.org/2021.nodalida-main.3}, + year = {2021}, +} +""", prompt="Classify parliament speeches in Norwegian based on political affiliation", ) diff --git a/mteb/tasks/Classification/ory/OdiaNewsClassification.py b/mteb/tasks/Classification/ory/OdiaNewsClassification.py index 6e89c50ab1..214b8f67f3 100644 --- a/mteb/tasks/Classification/ory/OdiaNewsClassification.py +++ b/mteb/tasks/Classification/ory/OdiaNewsClassification.py @@ -26,12 +26,14 @@ class OdiaNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085}, -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/pan/PunjabiNewsClassification.py b/mteb/tasks/Classification/pan/PunjabiNewsClassification.py index fb948d7746..bc4b812dd1 100644 --- a/mteb/tasks/Classification/pan/PunjabiNewsClassification.py +++ b/mteb/tasks/Classification/pan/PunjabiNewsClassification.py @@ -26,12 +26,14 @@ class PunjabiNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085}, -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/pol/PolishClassification.py b/mteb/tasks/Classification/pol/PolishClassification.py index 7b1148f90b..805ac4b424 100644 --- a/mteb/tasks/Classification/pol/PolishClassification.py +++ b/mteb/tasks/Classification/pol/PolishClassification.py @@ -26,15 +26,17 @@ class CbdClassification(AbsTaskClassification): annotations_creators="human-annotated", # guess dialect=[], sample_creation="found", - bibtex_citation="""@proceedings{ogr:kob:19:poleval, - editor = {Maciej Ogrodniczuk and Łukasz Kobyliński}, - title = {{Proceedings of the PolEval 2019 Workshop}}, - year = {2019}, - address = {Warsaw, Poland}, + bibtex_citation=r""" +@proceedings{ogr:kob:19:poleval, + address = {Warsaw, Poland}, + editor = {Maciej Ogrodniczuk and Łukasz Kobyliński}, + isbn = {978-83-63159-28-3}, publisher = {Institute of Computer Science, Polish Academy of Sciences}, - url = {http://2019.poleval.pl/files/poleval2019.pdf}, - isbn = "978-83-63159-28-3"} -}""", + title = {{Proceedings of the PolEval 2019 Workshop}}, + url = {http://2019.poleval.pl/files/poleval2019.pdf}, + year = {2019}, +} +""", ) @@ -61,21 +63,23 @@ class PolEmo2InClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{kocon-etal-2019-multi, - title = "Multi-Level Sentiment Analysis of {P}ol{E}mo 2.0: Extended Corpus of Multi-Domain Consumer Reviews", - author = "Koco{\'n}, Jan and - Mi{\l}kowski, Piotr and - Za{\'s}ko-Zieli{\'n}ska, Monika", - booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)", - month = nov, - year = "2019", - address = "Hong Kong, China", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/K19-1092", - doi = "10.18653/v1/K19-1092", - pages = "980--991", - abstract = "In this article we present an extended version of PolEmo {--} a corpus of consumer reviews from 4 domains: medicine, hotels, products and school. Current version (PolEmo 2.0) contains 8,216 reviews having 57,466 sentences. Each text and sentence was manually annotated with sentiment in 2+1 scheme, which gives a total of 197,046 annotations. We obtained a high value of Positive Specific Agreement, which is 0.91 for texts and 0.88 for sentences. PolEmo 2.0 is publicly available under a Creative Commons copyright license. We explored recent deep learning approaches for the recognition of sentiment, such as Bi-directional Long Short-Term Memory (BiLSTM) and Bidirectional Encoder Representations from Transformers (BERT).", -}""", + bibtex_citation=r""" +@inproceedings{kocon-etal-2019-multi, + abstract = {In this article we present an extended version of PolEmo {--} a corpus of consumer reviews from 4 domains: medicine, hotels, products and school. Current version (PolEmo 2.0) contains 8,216 reviews having 57,466 sentences. Each text and sentence was manually annotated with sentiment in 2+1 scheme, which gives a total of 197,046 annotations. We obtained a high value of Positive Specific Agreement, which is 0.91 for texts and 0.88 for sentences. PolEmo 2.0 is publicly available under a Creative Commons copyright license. We explored recent deep learning approaches for the recognition of sentiment, such as Bi-directional Long Short-Term Memory (BiLSTM) and Bidirectional Encoder Representations from Transformers (BERT).}, + address = {Hong Kong, China}, + author = {Koco{\'n}, Jan and +Mi{\l}kowski, Piotr and +Za{\'s}ko-Zieli{\'n}ska, Monika}, + booktitle = {Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)}, + doi = {10.18653/v1/K19-1092}, + month = nov, + pages = {980--991}, + publisher = {Association for Computational Linguistics}, + title = {Multi-Level Sentiment Analysis of {P}ol{E}mo 2.0: Extended Corpus of Multi-Domain Consumer Reviews}, + url = {https://aclanthology.org/K19-1092}, + year = {2019}, +} +""", ) @@ -156,13 +160,15 @@ class PacClassification(AbsTaskClassification): annotations_creators=None, dialect=[], sample_creation=None, - bibtex_citation="""@misc{augustyniak2022waydesigningcompilinglepiszcze, - title={This is the way: designing and compiling LEPISZCZE, a comprehensive NLP benchmark for Polish}, - author={Łukasz Augustyniak and Kamil Tagowski and Albert Sawczyn and Denis Janiak and Roman Bartusiak and Adrian Szymczak and Marcin Wątroba and Arkadiusz Janz and Piotr Szymański and Mikołaj Morzy and Tomasz Kajdanowicz and Maciej Piasecki}, - year={2022}, - eprint={2211.13112}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2211.13112}, -}""", + bibtex_citation=r""" +@misc{augustyniak2022waydesigningcompilinglepiszcze, + archiveprefix = {arXiv}, + author = {Łukasz Augustyniak and Kamil Tagowski and Albert Sawczyn and Denis Janiak and Roman Bartusiak and Adrian Szymczak and Marcin Wątroba and Arkadiusz Janz and Piotr Szymański and Mikołaj Morzy and Tomasz Kajdanowicz and Maciej Piasecki}, + eprint = {2211.13112}, + primaryclass = {cs.CL}, + title = {This is the way: designing and compiling LEPISZCZE, a comprehensive NLP benchmark for Polish}, + url = {https://arxiv.org/abs/2211.13112}, + year = {2022}, +} +""", ) diff --git a/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py b/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py index a7abf6b0f9..8920f8043e 100644 --- a/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py +++ b/mteb/tasks/Classification/por/HateSpeechPortugueseClassification.py @@ -27,28 +27,28 @@ class HateSpeechPortugueseClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{fortuna-etal-2019-hierarchically, - title = "A Hierarchically-Labeled {P}ortuguese Hate Speech Dataset", - author = "Fortuna, Paula and - Rocha da Silva, Jo{\~a}o and - Soler-Company, Juan and - Wanner, Leo and - Nunes, S{\'e}rgio", - editor = "Roberts, Sarah T. and - Tetreault, Joel and - Prabhakaran, Vinodkumar and - Waseem, Zeerak", - booktitle = "Proceedings of the Third Workshop on Abusive Language Online", - month = aug, - year = "2019", - address = "Florence, Italy", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/W19-3510", - doi = "10.18653/v1/W19-3510", - pages = "94--104", - } - """, + bibtex_citation=r""" +@inproceedings{fortuna-etal-2019-hierarchically, + address = {Florence, Italy}, + author = {Fortuna, Paula and +Rocha da Silva, Jo{\~a}o and +Soler-Company, Juan and +Wanner, Leo and +Nunes, S{\'e}rgio}, + booktitle = {Proceedings of the Third Workshop on Abusive Language Online}, + doi = {10.18653/v1/W19-3510}, + editor = {Roberts, Sarah T. and +Tetreault, Joel and +Prabhakaran, Vinodkumar and +Waseem, Zeerak}, + month = aug, + pages = {94--104}, + publisher = {Association for Computational Linguistics}, + title = {A Hierarchically-Labeled {P}ortuguese Hate Speech Dataset}, + url = {https://aclanthology.org/W19-3510}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/ron/Moroco.py b/mteb/tasks/Classification/ron/Moroco.py index 2324f16ef8..479dd5a1d4 100644 --- a/mteb/tasks/Classification/ron/Moroco.py +++ b/mteb/tasks/Classification/ron/Moroco.py @@ -32,15 +32,15 @@ class Moroco(AbsTaskClassification): "ron-Latn-mol", ], # Moldavian, or the Romanian dialect used in Moldova, does not have an ISO 639-1 code assigned to it. However, it has been given the three-letter code "mol" under ISO 639-3 sample_creation="found", - bibtex_citation="""" - @inproceedings{ Butnaru-ACL-2019, - author = {Andrei M. Butnaru and Radu Tudor Ionescu}, - title = "{MOROCO: The Moldavian and Romanian Dialectal Corpus}", - booktitle = {Proceedings of ACL}, - year = {2019}, - pages={688--698}, - } - """, + bibtex_citation=r""" +@inproceedings{Butnaru-ACL-2019, + author = {Andrei M. Butnaru and Radu Tudor Ionescu}, + booktitle = {Proceedings of ACL}, + pages = {688--698}, + title = {{MOROCO: The Moldavian and Romanian Dialectal Corpus}}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py b/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py index 6666d615a3..be06de0fa5 100644 --- a/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py +++ b/mteb/tasks/Classification/ron/RomanianReviewsSentiment.py @@ -27,13 +27,12 @@ class RomanianReviewsSentiment(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" -@article{ - tache2101clustering, - title={Clustering Word Embeddings with Self-Organizing Maps. Application on LaRoSeDa -- A Large Romanian Sentiment Data Set}, - author={Anca Maria Tache and Mihaela Gaman and Radu Tudor Ionescu}, - journal={ArXiv}, - year = {2021} + bibtex_citation=r""" +@article{tache2101clustering, + author = {Anca Maria Tache and Mihaela Gaman and Radu Tudor Ionescu}, + journal = {ArXiv}, + title = {Clustering Word Embeddings with Self-Organizing Maps. Application on LaRoSeDa -- A Large Romanian Sentiment Data Set}, + year = {2021}, } """, ) diff --git a/mteb/tasks/Classification/ron/RomanianSentimentClassification.py b/mteb/tasks/Classification/ron/RomanianSentimentClassification.py index 1bcfd0052c..3622620d50 100644 --- a/mteb/tasks/Classification/ron/RomanianSentimentClassification.py +++ b/mteb/tasks/Classification/ron/RomanianSentimentClassification.py @@ -29,11 +29,12 @@ class RomanianSentimentClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{dumitrescu2020birth, - title={The birth of Romanian BERT}, - author={Dumitrescu, Stefan Daniel and Avram, Andrei-Marius and Pyysalo, Sampo}, - journal={arXiv preprint arXiv:2009.08712}, - year={2020} + bibtex_citation=r""" +@article{dumitrescu2020birth, + author = {Dumitrescu, Stefan Daniel and Avram, Andrei-Marius and Pyysalo, Sampo}, + journal = {arXiv preprint arXiv:2009.08712}, + title = {The birth of Romanian BERT}, + year = {2020}, } """, ) diff --git a/mteb/tasks/Classification/rus/HeadlineClassification.py b/mteb/tasks/Classification/rus/HeadlineClassification.py index ca16fd6a85..9def591d0c 100644 --- a/mteb/tasks/Classification/rus/HeadlineClassification.py +++ b/mteb/tasks/Classification/rus/HeadlineClassification.py @@ -26,30 +26,32 @@ class HeadlineClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{gudkov-etal-2020-automatically, - title = "Automatically Ranked {R}ussian Paraphrase Corpus for Text Generation", - author = "Gudkov, Vadim and - Mitrofanova, Olga and - Filippskikh, Elizaveta", - editor = "Birch, Alexandra and - Finch, Andrew and - Hayashi, Hiroaki and - Heafield, Kenneth and - Junczys-Dowmunt, Marcin and - Konstas, Ioannis and - Li, Xian and - Neubig, Graham and - Oda, Yusuke", - booktitle = "Proceedings of the Fourth Workshop on Neural Generation and Translation", - month = jul, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.ngt-1.6", - doi = "10.18653/v1/2020.ngt-1.6", - pages = "54--59", - abstract = "The article is focused on automatic development and ranking of a large corpus for Russian paraphrase generation which proves to be the first corpus of such type in Russian computational linguistics. Existing manually annotated paraphrase datasets for Russian are limited to small-sized ParaPhraser corpus and ParaPlag which are suitable for a set of NLP tasks, such as paraphrase and plagiarism detection, sentence similarity and relatedness estimation, etc. Due to size restrictions, these datasets can hardly be applied in end-to-end text generation solutions. Meanwhile, paraphrase generation requires a large amount of training data. In our study we propose a solution to the problem: we collect, rank and evaluate a new publicly available headline paraphrase corpus (ParaPhraser Plus), and then perform text generation experiments with manual evaluation on automatically ranked corpora using the Universal Transformer architecture.", - }""", + bibtex_citation=r""" +@inproceedings{gudkov-etal-2020-automatically, + abstract = {The article is focused on automatic development and ranking of a large corpus for Russian paraphrase generation which proves to be the first corpus of such type in Russian computational linguistics. Existing manually annotated paraphrase datasets for Russian are limited to small-sized ParaPhraser corpus and ParaPlag which are suitable for a set of NLP tasks, such as paraphrase and plagiarism detection, sentence similarity and relatedness estimation, etc. Due to size restrictions, these datasets can hardly be applied in end-to-end text generation solutions. Meanwhile, paraphrase generation requires a large amount of training data. In our study we propose a solution to the problem: we collect, rank and evaluate a new publicly available headline paraphrase corpus (ParaPhraser Plus), and then perform text generation experiments with manual evaluation on automatically ranked corpora using the Universal Transformer architecture.}, + address = {Online}, + author = {Gudkov, Vadim and +Mitrofanova, Olga and +Filippskikh, Elizaveta}, + booktitle = {Proceedings of the Fourth Workshop on Neural Generation and Translation}, + doi = {10.18653/v1/2020.ngt-1.6}, + editor = {Birch, Alexandra and +Finch, Andrew and +Hayashi, Hiroaki and +Heafield, Kenneth and +Junczys-Dowmunt, Marcin and +Konstas, Ioannis and +Li, Xian and +Neubig, Graham and +Oda, Yusuke}, + month = jul, + pages = {54--59}, + publisher = {Association for Computational Linguistics}, + title = {Automatically Ranked {R}ussian Paraphrase Corpus for Text Generation}, + url = {https://aclanthology.org/2020.ngt-1.6}, + year = {2020}, +} +""", prompt="Classify the topic or theme of the given news headline", ) diff --git a/mteb/tasks/Classification/rus/InappropriatenessClassification.py b/mteb/tasks/Classification/rus/InappropriatenessClassification.py index d65ca5eb3c..7ff0ed11b2 100644 --- a/mteb/tasks/Classification/rus/InappropriatenessClassification.py +++ b/mteb/tasks/Classification/rus/InappropriatenessClassification.py @@ -26,34 +26,36 @@ class InappropriatenessClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{babakov-etal-2021-detecting, - title = "Detecting Inappropriate Messages on Sensitive Topics that Could Harm a Company{'}s Reputation", - author = "Babakov, Nikolay and - Logacheva, Varvara and - Kozlova, Olga and - Semenov, Nikita and - Panchenko, Alexander", - editor = "Babych, Bogdan and - Kanishcheva, Olga and - Nakov, Preslav and - Piskorski, Jakub and - Pivovarova, Lidia and - Starko, Vasyl and - Steinberger, Josef and - Yangarber, Roman and - Marci{\'n}czuk, Micha{\l} and - Pollak, Senja and - P{\v{r}}ib{\'a}{\v{n}}, Pavel and - Robnik-{\v{S}}ikonja, Marko", - booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing", - month = apr, - year = "2021", - address = "Kiyv, Ukraine", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.bsnlp-1.4", - pages = "26--36", - abstract = "Not all topics are equally {``}flammable{''} in terms of toxicity: a calm discussion of turtles or fishing less often fuels inappropriate toxic dialogues than a discussion of politics or sexual minorities. We define a set of sensitive topics that can yield inappropriate and toxic messages and describe the methodology of collecting and labelling a dataset for appropriateness. While toxicity in user-generated data is well-studied, we aim at defining a more fine-grained notion of inappropriateness. The core of inappropriateness is that it can harm the reputation of a speaker. This is different from toxicity in two respects: (i) inappropriateness is topic-related, and (ii) inappropriate message is not toxic but still unacceptable. We collect and release two datasets for Russian: a topic-labelled dataset and an appropriateness-labelled dataset. We also release pre-trained classification models trained on this data.", - }""", + bibtex_citation=r""" +@inproceedings{babakov-etal-2021-detecting, + abstract = {Not all topics are equally {``}flammable{''} in terms of toxicity: a calm discussion of turtles or fishing less often fuels inappropriate toxic dialogues than a discussion of politics or sexual minorities. We define a set of sensitive topics that can yield inappropriate and toxic messages and describe the methodology of collecting and labelling a dataset for appropriateness. While toxicity in user-generated data is well-studied, we aim at defining a more fine-grained notion of inappropriateness. The core of inappropriateness is that it can harm the reputation of a speaker. This is different from toxicity in two respects: (i) inappropriateness is topic-related, and (ii) inappropriate message is not toxic but still unacceptable. We collect and release two datasets for Russian: a topic-labelled dataset and an appropriateness-labelled dataset. We also release pre-trained classification models trained on this data.}, + address = {Kiyv, Ukraine}, + author = {Babakov, Nikolay and +Logacheva, Varvara and +Kozlova, Olga and +Semenov, Nikita and +Panchenko, Alexander}, + booktitle = {Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing}, + editor = {Babych, Bogdan and +Kanishcheva, Olga and +Nakov, Preslav and +Piskorski, Jakub and +Pivovarova, Lidia and +Starko, Vasyl and +Steinberger, Josef and +Yangarber, Roman and +Marci{\'n}czuk, Micha{\l} and +Pollak, Senja and +P{\v{r}}ib{\'a}{\v{n}}, Pavel and +Robnik-{\v{S}}ikonja, Marko}, + month = apr, + pages = {26--36}, + publisher = {Association for Computational Linguistics}, + title = {Detecting Inappropriate Messages on Sensitive Topics that Could Harm a Company{'}s Reputation}, + url = {https://aclanthology.org/2021.bsnlp-1.4}, + year = {2021}, +} +""", prompt="Classify the given message as either sensitive topic or not", ) @@ -85,33 +87,35 @@ class InappropriatenessClassificationv2(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{babakov-etal-2021-detecting, - title = "Detecting Inappropriate Messages on Sensitive Topics that Could Harm a Company{'}s Reputation", - author = "Babakov, Nikolay and - Logacheva, Varvara and - Kozlova, Olga and - Semenov, Nikita and - Panchenko, Alexander", - editor = "Babych, Bogdan and - Kanishcheva, Olga and - Nakov, Preslav and - Piskorski, Jakub and - Pivovarova, Lidia and - Starko, Vasyl and - Steinberger, Josef and - Yangarber, Roman and - Marci{\'n}czuk, Micha{\l} and - Pollak, Senja and - P{\v{r}}ib{\'a}{\v{n}}, Pavel and - Robnik-{\v{S}}ikonja, Marko", - booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing", - month = apr, - year = "2021", - address = "Kiyv, Ukraine", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.bsnlp-1.4", - pages = "26--36", - abstract = "Not all topics are equally {``}flammable{''} in terms of toxicity: a calm discussion of turtles or fishing less often fuels inappropriate toxic dialogues than a discussion of politics or sexual minorities. We define a set of sensitive topics that can yield inappropriate and toxic messages and describe the methodology of collecting and labelling a dataset for appropriateness. While toxicity in user-generated data is well-studied, we aim at defining a more fine-grained notion of inappropriateness. The core of inappropriateness is that it can harm the reputation of a speaker. This is different from toxicity in two respects: (i) inappropriateness is topic-related, and (ii) inappropriate message is not toxic but still unacceptable. We collect and release two datasets for Russian: a topic-labelled dataset and an appropriateness-labelled dataset. We also release pre-trained classification models trained on this data.", - }""", + bibtex_citation=r""" +@inproceedings{babakov-etal-2021-detecting, + abstract = {Not all topics are equally {``}flammable{''} in terms of toxicity: a calm discussion of turtles or fishing less often fuels inappropriate toxic dialogues than a discussion of politics or sexual minorities. We define a set of sensitive topics that can yield inappropriate and toxic messages and describe the methodology of collecting and labelling a dataset for appropriateness. While toxicity in user-generated data is well-studied, we aim at defining a more fine-grained notion of inappropriateness. The core of inappropriateness is that it can harm the reputation of a speaker. This is different from toxicity in two respects: (i) inappropriateness is topic-related, and (ii) inappropriate message is not toxic but still unacceptable. We collect and release two datasets for Russian: a topic-labelled dataset and an appropriateness-labelled dataset. We also release pre-trained classification models trained on this data.}, + address = {Kiyv, Ukraine}, + author = {Babakov, Nikolay and +Logacheva, Varvara and +Kozlova, Olga and +Semenov, Nikita and +Panchenko, Alexander}, + booktitle = {Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing}, + editor = {Babych, Bogdan and +Kanishcheva, Olga and +Nakov, Preslav and +Piskorski, Jakub and +Pivovarova, Lidia and +Starko, Vasyl and +Steinberger, Josef and +Yangarber, Roman and +Marci{\'n}czuk, Micha{\l} and +Pollak, Senja and +P{\v{r}}ib{\'a}{\v{n}}, Pavel and +Robnik-{\v{S}}ikonja, Marko}, + month = apr, + pages = {26--36}, + publisher = {Association for Computational Linguistics}, + title = {Detecting Inappropriate Messages on Sensitive Topics that Could Harm a Company{'}s Reputation}, + url = {https://aclanthology.org/2021.bsnlp-1.4}, + year = {2021}, +} +""", prompt="Classify the given message as either sensitive topic or not", ) diff --git a/mteb/tasks/Classification/rus/KinopoiskClassification.py b/mteb/tasks/Classification/rus/KinopoiskClassification.py index 2fa32a7fdf..cdde48716b 100644 --- a/mteb/tasks/Classification/rus/KinopoiskClassification.py +++ b/mteb/tasks/Classification/rus/KinopoiskClassification.py @@ -26,14 +26,16 @@ class KinopoiskClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{blinov2013research, - title={Research of lexical approach and machine learning methods for sentiment analysis}, - author={Blinov, PD and Klekovkina, Maria and Kotelnikov, Eugeny and Pestov, Oleg}, - journal={Computational Linguistics and Intellectual Technologies}, - volume={2}, - number={12}, - pages={48--58}, - year={2013} - }""", + bibtex_citation=r""" +@article{blinov2013research, + author = {Blinov, PD and Klekovkina, Maria and Kotelnikov, Eugeny and Pestov, Oleg}, + journal = {Computational Linguistics and Intellectual Technologies}, + number = {12}, + pages = {48--58}, + title = {Research of lexical approach and machine learning methods for sentiment analysis}, + volume = {2}, + year = {2013}, +} +""", prompt="Classify the sentiment expressed in the given movie review text", ) diff --git a/mteb/tasks/Classification/rus/RuReviewsClassification.py b/mteb/tasks/Classification/rus/RuReviewsClassification.py index 7303f3f85d..37f9e83af3 100644 --- a/mteb/tasks/Classification/rus/RuReviewsClassification.py +++ b/mteb/tasks/Classification/rus/RuReviewsClassification.py @@ -26,18 +26,20 @@ class RuReviewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@INPROCEEDINGS{Smetanin-SA-2019, - author={Sergey Smetanin and Michail Komarov}, - booktitle={2019 IEEE 21st Conference on Business Informatics (CBI)}, - title={Sentiment Analysis of Product Reviews in Russian using Convolutional Neural Networks}, - year={2019}, - volume={01}, - number={}, - pages={482-486}, - doi={10.1109/CBI.2019.00062}, - ISSN={2378-1963}, - month={July} - }""", + bibtex_citation=r""" +@inproceedings{Smetanin-SA-2019, + author = {Sergey Smetanin and Michail Komarov}, + booktitle = {2019 IEEE 21st Conference on Business Informatics (CBI)}, + doi = {10.1109/CBI.2019.00062}, + issn = {2378-1963}, + month = {July}, + number = {}, + pages = {482-486}, + title = {Sentiment Analysis of Product Reviews in Russian using Convolutional Neural Networks}, + volume = {01}, + year = {2019}, +} +""", prompt="Classify product reviews into positive, negative or neutral sentiment", ) diff --git a/mteb/tasks/Classification/rus/ru_nlu_intent_classification.py b/mteb/tasks/Classification/rus/ru_nlu_intent_classification.py index ec2876118f..b6e11e8979 100644 --- a/mteb/tasks/Classification/rus/ru_nlu_intent_classification.py +++ b/mteb/tasks/Classification/rus/ru_nlu_intent_classification.py @@ -38,13 +38,15 @@ class RuNLUIntentClassification(AbsTaskClassification, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{liu2019benchmarkingnaturallanguageunderstanding, - title={Benchmarking Natural Language Understanding Services for building Conversational Agents}, - author={Xingkun Liu and Arash Eshghi and Pawel Swietojanski and Verena Rieser}, - year={2019}, - eprint={1903.05566}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/1903.05566}, -}""", + bibtex_citation=r""" +@misc{liu2019benchmarkingnaturallanguageunderstanding, + archiveprefix = {arXiv}, + author = {Xingkun Liu and Arash Eshghi and Pawel Swietojanski and Verena Rieser}, + eprint = {1903.05566}, + primaryclass = {cs.CL}, + title = {Benchmarking Natural Language Understanding Services for building Conversational Agents}, + url = {https://arxiv.org/abs/1903.05566}, + year = {2019}, +} +""", ) diff --git a/mteb/tasks/Classification/rus/senti_ru_eval.py b/mteb/tasks/Classification/rus/senti_ru_eval.py index db5bd0e174..a935dd8c76 100644 --- a/mteb/tasks/Classification/rus/senti_ru_eval.py +++ b/mteb/tasks/Classification/rus/senti_ru_eval.py @@ -28,12 +28,13 @@ class SentiRuEval2016Classification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{loukachevitch2016sentirueval, - title={SentiRuEval-2016: overcoming time gap and data sparsity in tweet sentiment analysis}, - author={Loukachevitch, NV and Rubtsova, Yu V}, - booktitle={Computational Linguistics and Intellectual Technologies}, - pages={416--426}, - year={2016} + bibtex_citation=r""" +@inproceedings{loukachevitch2016sentirueval, + author = {Loukachevitch, NV and Rubtsova, Yu V}, + booktitle = {Computational Linguistics and Intellectual Technologies}, + pages = {416--426}, + title = {SentiRuEval-2016: overcoming time gap and data sparsity in tweet sentiment analysis}, + year = {2016}, } """, ) diff --git a/mteb/tasks/Classification/san/SanskritShlokasClassification.py b/mteb/tasks/Classification/san/SanskritShlokasClassification.py index 806e468f00..91b8436e8d 100644 --- a/mteb/tasks/Classification/san/SanskritShlokasClassification.py +++ b/mteb/tasks/Classification/san/SanskritShlokasClassification.py @@ -26,27 +26,27 @@ class SanskritShlokasClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{arora-2020-inltk, - title = "i{NLTK}: Natural Language Toolkit for Indic Languages", - author = "Arora, Gaurav", - editor = "Park, Eunjeong L. and - Hagiwara, Masato and - Milajevs, Dmitrijs and - Liu, Nelson F. and - Chauhan, Geeticka and - Tan, Liling", - booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)", - month = nov, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.nlposs-1.10", - doi = "10.18653/v1/2020.nlposs-1.10", - pages = "66--71", - abstract = "We present iNLTK, an open-source NLP library consisting of pre-trained language models and out-of-the-box support for Data Augmentation, Textual Similarity, Sentence Embeddings, Word Embeddings, Tokenization and Text Generation in 13 Indic Languages. By using pre-trained models from iNLTK for text classification on publicly available datasets, we significantly outperform previously reported results. On these datasets, we also show that by using pre-trained models and data augmentation from iNLTK, we can achieve more than 95{\%} of the previous best performance by using less than 10{\%} of the training data. iNLTK is already being widely used by the community and has 40,000+ downloads, 600+ stars and 100+ forks on GitHub.", - } - """, + bibtex_citation=r""" +@inproceedings{arora-2020-inltk, + abstract = {We present iNLTK, an open-source NLP library consisting of pre-trained language models and out-of-the-box support for Data Augmentation, Textual Similarity, Sentence Embeddings, Word Embeddings, Tokenization and Text Generation in 13 Indic Languages. By using pre-trained models from iNLTK for text classification on publicly available datasets, we significantly outperform previously reported results. On these datasets, we also show that by using pre-trained models and data augmentation from iNLTK, we can achieve more than 95{\%} of the previous best performance by using less than 10{\%} of the training data. iNLTK is already being widely used by the community and has 40,000+ downloads, 600+ stars and 100+ forks on GitHub.}, + address = {Online}, + author = {Arora, Gaurav}, + booktitle = {Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)}, + doi = {10.18653/v1/2020.nlposs-1.10}, + editor = {Park, Eunjeong L. and +Hagiwara, Masato and +Milajevs, Dmitrijs and +Liu, Nelson F. and +Chauhan, Geeticka and +Tan, Liling}, + month = nov, + pages = {66--71}, + publisher = {Association for Computational Linguistics}, + title = {i{NLTK}: Natural Language Toolkit for Indic Languages}, + url = {https://aclanthology.org/2020.nlposs-1.10}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/sin/SinhalaNewsClassification.py b/mteb/tasks/Classification/sin/SinhalaNewsClassification.py index 98d414b3c0..4b8c54a184 100644 --- a/mteb/tasks/Classification/sin/SinhalaNewsClassification.py +++ b/mteb/tasks/Classification/sin/SinhalaNewsClassification.py @@ -26,18 +26,21 @@ class SinhalaNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{deSilva2015, - author = {Nisansa de Silva}, - title = {Sinhala Text Classification: Observations from the Perspective of a Resource Poor Language}, - journal = {Year of Publication}, - year = {2015}, - } - @article{dhananjaya2022, - author = {Dhananjaya et al.}, - title = {BERTifying Sinhala - A Comprehensive Analysis of Pre-trained Language Models for Sinhala Text Classification}, - journal = {Year of Publication}, - year = {2022}, - }""", + bibtex_citation=r""" +@article{deSilva2015, + author = {Nisansa de Silva}, + journal = {Year of Publication}, + title = {Sinhala Text Classification: Observations from the Perspective of a Resource Poor Language}, + year = {2015}, +} + +@article{dhananjaya2022, + author = {Dhananjaya et al.}, + journal = {Year of Publication}, + title = {BERTifying Sinhala - A Comprehensive Analysis of Pre-trained Language Models for Sinhala Text Classification}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py b/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py index a7bd9763a7..a665d1c0c9 100644 --- a/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py +++ b/mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py @@ -26,13 +26,14 @@ class SinhalaNewsSourceClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{dhananjaya2022, - author = {Dhananjaya et al.}, - title = {BERTifying Sinhala - A Comprehensive Analysis of Pre-trained Language Models for Sinhala Text Classification}, - journal = {Year of Publication}, - year = {2022}, - }""", + bibtex_citation=r""" +@article{dhananjaya2022, + author = {Dhananjaya et al.}, + journal = {Year of Publication}, + title = {BERTifying Sinhala - A Comprehensive Analysis of Pre-trained Language Models for Sinhala Text Classification}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py b/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py index d13305bc9c..6577f7f315 100644 --- a/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py @@ -26,14 +26,14 @@ class CSFDSKMovieReviewSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @misc{štefánik2023resources, - title={Resources and Few-shot Learners for In-context Learning in Slavic Languages}, - author={Michal Štefánik and Marek Kadlčík and Piotr Gramacki and Petr Sojka}, - year={2023}, - eprint={2304.01922}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + archiveprefix = {arXiv}, + author = {Michal Štefánik and Marek Kadlčík and Piotr Gramacki and Petr Sojka}, + eprint = {2304.01922}, + primaryclass = {cs.CL}, + title = {Resources and Few-shot Learners for In-context Learning in Slavic Languages}, + year = {2023}, } """, ) diff --git a/mteb/tasks/Classification/slv/FrenkSlClassification.py b/mteb/tasks/Classification/slv/FrenkSlClassification.py index b5add7e3eb..f88d4ff9ff 100644 --- a/mteb/tasks/Classification/slv/FrenkSlClassification.py +++ b/mteb/tasks/Classification/slv/FrenkSlClassification.py @@ -27,15 +27,17 @@ class FrenkSlClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{ljubešić2019frenk, - title={The FRENK Datasets of Socially Unacceptable Discourse in Slovene and English}, - author={Nikola Ljubešić and Darja Fišer and Tomaž Erjavec}, - year={2019}, - eprint={1906.02045}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/1906.02045} - }""", + bibtex_citation=r""" +@misc{ljubešić2019frenk, + archiveprefix = {arXiv}, + author = {Nikola Ljubešić and Darja Fišer and Tomaž Erjavec}, + eprint = {1906.02045}, + primaryclass = {cs.CL}, + title = {The FRENK Datasets of Socially Unacceptable Discourse in Slovene and English}, + url = {https://arxiv.org/abs/1906.02045}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/spa/SpanishSentimentClassification.py b/mteb/tasks/Classification/spa/SpanishSentimentClassification.py index 28e56f87c9..785b131bbc 100644 --- a/mteb/tasks/Classification/spa/SpanishSentimentClassification.py +++ b/mteb/tasks/Classification/spa/SpanishSentimentClassification.py @@ -26,29 +26,29 @@ class SpanishSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{mollanorozy-etal-2023-cross, - title = "Cross-lingual Transfer Learning with \{P\}ersian", - author = "Mollanorozy, Sepideh and - Tanti, Marc and - Nissim, Malvina", - editor = "Beinborn, Lisa and - Goswami, Koustava and - Murado{\\u{g}}lu, Saliha and - Sorokin, Alexey and - Kumar, Ritesh and - Shcherbakov, Andreas and - Ponti, Edoardo M. and - Cotterell, Ryan and - Vylomova, Ekaterina", - booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP", - month = may, - year = "2023", - address = "Dubrovnik, Croatia", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.sigtyp-1.9", - doi = "10.18653/v1/2023.sigtyp-1.9", - pages = "89--95", - } - """, + bibtex_citation=r""" +@inproceedings{mollanorozy-etal-2023-cross, + address = {Dubrovnik, Croatia}, + author = {Mollanorozy, Sepideh and +Tanti, Marc and +Nissim, Malvina}, + booktitle = {Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP}, + doi = {10.18653/v1/2023.sigtyp-1.9}, + editor = {Beinborn, Lisa and +Goswami, Koustava and +Murado{\\u{g}}lu, Saliha and +Sorokin, Alexey and +Kumar, Ritesh and +Shcherbakov, Andreas and +Ponti, Edoardo M. and +Cotterell, Ryan and +Vylomova, Ekaterina}, + month = may, + pages = {89--95}, + publisher = {Association for Computational Linguistics}, + title = {Cross-lingual Transfer Learning with \{P\}ersian}, + url = {https://aclanthology.org/2023.sigtyp-1.9}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py b/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py index d51b42f88d..e5b667e289 100644 --- a/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py +++ b/mteb/tasks/Classification/ssw/SiswatiNewsClassification.py @@ -26,8 +26,17 @@ class SiswatiNewsClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{Madodonga_Marivate_Adendorff_2023, title={Izindaba-Tindzaba: Machine learning news categorisation for Long and Short Text for isiZulu and Siswati}, volume={4}, url={https://upjournals.up.ac.za/index.php/dhasa/article/view/4449}, DOI={10.55492/dhasa.v4i01.4449}, author={Madodonga, Andani and Marivate, Vukosi and Adendorff, Matthew}, year={2023}, month={Jan.} } - """, + bibtex_citation=r""" +@article{Madodonga_Marivate_Adendorff_2023, + author = {Madodonga, Andani and Marivate, Vukosi and Adendorff, Matthew}, + doi = {10.55492/dhasa.v4i01.4449}, + month = {Jan.}, + title = {Izindaba-Tindzaba: Machine learning news categorisation for Long and Short Text for isiZulu and Siswati}, + url = {https://upjournals.up.ac.za/index.php/dhasa/article/view/4449}, + volume = {4}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py b/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py index 8918c4a1a4..25df08775d 100644 --- a/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py +++ b/mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py @@ -26,14 +26,14 @@ class SlovakMovieReviewSentimentClassification(AbsTaskClassification): license="cc-by-nc-sa-4.0", annotations_creators="derived", sample_creation="found", - bibtex_citation=""" - @article{vstefanik2023resources, - title={Resources and Few-shot Learners for In-context Learning in Slavic Languages}, - author={{\v{S}}tef{\'a}nik, Michal and Kadl{\v{c}}{\'\i}k, Marek and Gramacki, Piotr and Sojka, Petr}, - journal={arXiv preprint arXiv:2304.01922}, - year={2023} - } - """, + bibtex_citation=r""" +@article{vstefanik2023resources, + author = {{\v{S}}tef{\'a}nik, Michal and Kadl{\v{c}}{\'\i}k, Marek and Gramacki, Piotr and Sojka, Petr}, + journal = {arXiv preprint arXiv:2304.01922}, + title = {Resources and Few-shot Learners for In-context Learning in Slavic Languages}, + year = {2023}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/swa/SwahiliNewsClassification.py b/mteb/tasks/Classification/swa/SwahiliNewsClassification.py index 6a4cb6bdc8..518b749de0 100644 --- a/mteb/tasks/Classification/swa/SwahiliNewsClassification.py +++ b/mteb/tasks/Classification/swa/SwahiliNewsClassification.py @@ -26,16 +26,16 @@ class SwahiliNewsClassification(AbsTaskClassification): license="cc-by-nc-sa-4.0", annotations_creators="derived", sample_creation="found", - bibtex_citation=""" - @inproceedings{davis2020swahili, - title = "Swahili: News Classification Dataset (0.2)", - author = "Davis, David", - year = "2020", - publisher = "Zenodo", - doi = "10.5281/zenodo.5514203", - url = "https://doi.org/10.5281/zenodo.5514203" - } - """, + bibtex_citation=r""" +@inproceedings{davis2020swahili, + author = {Davis, David}, + doi = {10.5281/zenodo.5514203}, + publisher = {Zenodo}, + title = {Swahili: News Classification Dataset (0.2)}, + url = {https://doi.org/10.5281/zenodo.5514203}, + year = {2020}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/Classification/swe/DalajClassification.py b/mteb/tasks/Classification/swe/DalajClassification.py index 780fe65dbf..05983d0e4f 100644 --- a/mteb/tasks/Classification/swe/DalajClassification.py +++ b/mteb/tasks/Classification/swe/DalajClassification.py @@ -29,12 +29,14 @@ class DalajClassification(AbsTaskClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@misc{2105.06681, -Author = {Elena Volodina and Yousuf Ali Mohammed and Julia Klezl}, -Title = {DaLAJ - a dataset for linguistic acceptability judgments for Swedish: Format, baseline, sharing}, -Year = {2021}, -Eprint = {arXiv:2105.06681}, -}""", + bibtex_citation=r""" +@misc{2105.06681, + author = {Elena Volodina and Yousuf Ali Mohammed and Julia Klezl}, + eprint = {arXiv:2105.06681}, + title = {DaLAJ - a dataset for linguistic acceptability judgments for Swedish: Format, baseline, sharing}, + year = {2021}, +} +""", prompt="Classify texts based on linguistic acceptability in Swedish", ) diff --git a/mteb/tasks/Classification/swe/SweRecClassification.py b/mteb/tasks/Classification/swe/SweRecClassification.py index 7083ade1fb..8cc7b8dff8 100644 --- a/mteb/tasks/Classification/swe/SweRecClassification.py +++ b/mteb/tasks/Classification/swe/SweRecClassification.py @@ -26,18 +26,19 @@ class SweRecClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{nielsen-2023-scandeval, - title = "{S}cand{E}val: A Benchmark for {S}candinavian Natural Language Processing", - author = "Nielsen, Dan", - editor = {Alum{\"a}e, Tanel and - Fishel, Mark}, - booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may, - year = "2023", - address = "T{\'o}rshavn, Faroe Islands", - publisher = "University of Tartu Library", - url = "https://aclanthology.org/2023.nodalida-1.20", - pages = "185--201", + bibtex_citation=r""" +@inproceedings{nielsen-2023-scandeval, + address = {T{\'o}rshavn, Faroe Islands}, + author = {Nielsen, Dan}, + booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Alum{\"a}e, Tanel and +Fishel, Mark}, + month = may, + pages = {185--201}, + publisher = {University of Tartu Library}, + title = {{S}cand{E}val: A Benchmark for {S}candinavian Natural Language Processing}, + url = {https://aclanthology.org/2023.nodalida-1.20}, + year = {2023}, } """, prompt="Classify Swedish reviews by sentiment", diff --git a/mteb/tasks/Classification/tam/TamilNewsClassification.py b/mteb/tasks/Classification/tam/TamilNewsClassification.py index af9698d0b1..3f4505bce8 100644 --- a/mteb/tasks/Classification/tam/TamilNewsClassification.py +++ b/mteb/tasks/Classification/tam/TamilNewsClassification.py @@ -26,12 +26,14 @@ class TamilNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{kunchukuttan2020indicnlpcorpus, - title={AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, - author={Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, - year={2020}, - journal={arXiv preprint arXiv:2005.00085}, -}""", + bibtex_citation=r""" +@article{kunchukuttan2020indicnlpcorpus, + author = {Anoop Kunchukuttan and Divyanshu Kakwani and Satish Golla and Gokul N.C. and Avik Bhattacharyya and Mitesh M. Khapra and Pratyush Kumar}, + journal = {arXiv preprint arXiv:2005.00085}, + title = {AI4Bharat-IndicNLP Corpus: Monolingual Corpora and Word Embeddings for Indic Languages}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/tha/WisesightSentimentClassification.py b/mteb/tasks/Classification/tha/WisesightSentimentClassification.py index 3a76003d5b..7bd83863d2 100644 --- a/mteb/tasks/Classification/tha/WisesightSentimentClassification.py +++ b/mteb/tasks/Classification/tha/WisesightSentimentClassification.py @@ -27,20 +27,20 @@ class WisesightSentimentClassification(AbsTaskClassification): license="cc0-1.0", annotations_creators="expert-annotated", sample_creation="found", - bibtex_citation="""@software{bact_2019_3457447, - author = {Suriyawongkul, Arthit and - Chuangsuwanich, Ekapol and - Chormai, Pattarawat and - Polpanumas, Charin}, - title = {PyThaiNLP/wisesight-sentiment: First release}, - month = sep, - year = 2019, - publisher = {Zenodo}, - version = {v1.0}, - doi = {10.5281/zenodo.3457447}, - url = {https://doi.org/10.5281/zenodo.3457447} + bibtex_citation=r""" +@software{bact_2019_3457447, + author = {Suriyawongkul, Arthit and +Chuangsuwanich, Ekapol and +Chormai, Pattarawat and +Polpanumas, Charin}, + doi = {10.5281/zenodo.3457447}, + month = sep, + publisher = {Zenodo}, + title = {PyThaiNLP/wisesight-sentiment: First release}, + url = {https://doi.org/10.5281/zenodo.3457447}, + version = {v1.0}, + year = {2019}, } - """, ) diff --git a/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py b/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py index 4afd64dd21..9a51214759 100644 --- a/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py +++ b/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py @@ -26,17 +26,18 @@ class WongnaiReviewsClassification(AbsTaskClassification): license="lgpl-3.0", annotations_creators="derived", sample_creation="found", - bibtex_citation=""" - @software{cstorm125_2020_3852912, - author = {cstorm125 and lukkiddd}, - title = {PyThaiNLP/classification-benchmarks: v0.1-alpha}, - month = may, - year = 2020, - publisher = {Zenodo}, - version = {v0.1-alpha}, - doi = {10.5281/zenodo.3852912}, - url = {https://doi.org/10.5281/zenodo.3852912} - }""", + bibtex_citation=r""" +@software{cstorm125_2020_3852912, + author = {cstorm125 and lukkiddd}, + doi = {10.5281/zenodo.3852912}, + month = may, + publisher = {Zenodo}, + title = {PyThaiNLP/classification-benchmarks: v0.1-alpha}, + url = {https://doi.org/10.5281/zenodo.3852912}, + version = {v0.1-alpha}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/tsn/TswanaNewsClassification.py b/mteb/tasks/Classification/tsn/TswanaNewsClassification.py index c1eee27779..e9095fd0d3 100644 --- a/mteb/tasks/Classification/tsn/TswanaNewsClassification.py +++ b/mteb/tasks/Classification/tsn/TswanaNewsClassification.py @@ -26,16 +26,16 @@ class TswanaNewsClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{marivate2023puoberta, - title = {PuoBERTa: Training and evaluation of a curated language model for Setswana}, - author = {Vukosi Marivate and Moseli Mots'Oehli and Valencia Wagner and Richard Lastrucci and Isheanesu Dzingirai}, - year = {2023}, - booktitle= {SACAIR 2023 (To Appear)}, - keywords = {NLP}, - preprint_url = {https://arxiv.org/abs/2310.09141}, - dataset_url = {https://github.com/dsfsi/PuoBERTa}, - software_url = {https://huggingface.co/dsfsi/PuoBERTa} - } - """, + bibtex_citation=r""" +@inproceedings{marivate2023puoberta, + author = {Vukosi Marivate and Moseli Mots'Oehli and Valencia Wagner and Richard Lastrucci and Isheanesu Dzingirai}, + booktitle = {SACAIR 2023 (To Appear)}, + dataset_url = {https://github.com/dsfsi/PuoBERTa}, + keywords = {NLP}, + preprint_url = {https://arxiv.org/abs/2310.09141}, + software_url = {https://huggingface.co/dsfsi/PuoBERTa}, + title = {PuoBERTa: Training and evaluation of a curated language model for Setswana}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py b/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py index 64981c6ec2..680b52009b 100644 --- a/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py +++ b/mteb/tasks/Classification/tur/TurkishMovieSentimentClassification.py @@ -26,15 +26,15 @@ class TurkishMovieSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{Demirtas2013CrosslingualPD, - title={Cross-lingual polarity detection with machine translation}, - author={Erkin Demirtas and Mykola Pechenizkiy}, - booktitle={wisdom}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:3912960} - } - """, + bibtex_citation=r""" +@inproceedings{Demirtas2013CrosslingualPD, + author = {Erkin Demirtas and Mykola Pechenizkiy}, + booktitle = {wisdom}, + title = {Cross-lingual polarity detection with machine translation}, + url = {https://api.semanticscholar.org/CorpusID:3912960}, + year = {2013}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py b/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py index c33c537c69..7bfb086d99 100644 --- a/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py +++ b/mteb/tasks/Classification/tur/TurkishProductSentimentClassification.py @@ -26,13 +26,13 @@ class TurkishProductSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{Demirtas2013CrosslingualPD, - title={Cross-lingual polarity detection with machine translation}, - author={Erkin Demirtas and Mykola Pechenizkiy}, - booktitle={wisdom}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:3912960} - } - """, + bibtex_citation=r""" +@inproceedings{Demirtas2013CrosslingualPD, + author = {Erkin Demirtas and Mykola Pechenizkiy}, + booktitle = {wisdom}, + title = {Cross-lingual polarity detection with machine translation}, + url = {https://api.semanticscholar.org/CorpusID:3912960}, + year = {2013}, +} +""", ) diff --git a/mteb/tasks/Classification/ukr/UkrFormalityClassification.py b/mteb/tasks/Classification/ukr/UkrFormalityClassification.py index 0a7f08b8e0..fadc60edd8 100644 --- a/mteb/tasks/Classification/ukr/UkrFormalityClassification.py +++ b/mteb/tasks/Classification/ukr/UkrFormalityClassification.py @@ -32,16 +32,18 @@ class UkrFormalityClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@inproceedings{rao-tetreault-2018-dear, - title = "Dear Sir or Madam, May {I} Introduce the {GYAFC} Dataset: Corpus, Benchmarks and Metrics for Formality Style Transfer", - author = "Rao, Sudha and - Tetreault, Joel", - booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", - month = jun, - year = "2018", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/N18-1012", - }""", + bibtex_citation=r""" +@inproceedings{rao-tetreault-2018-dear, + author = {Rao, Sudha and +Tetreault, Joel}, + booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)}, + month = jun, + publisher = {Association for Computational Linguistics}, + title = {Dear Sir or Madam, May {I} Introduce the {GYAFC} Dataset: Corpus, Benchmarks and Metrics for Formality Style Transfer}, + url = {https://aclanthology.org/N18-1012}, + year = {2018}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py b/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py index 62440ef9c2..ff3d5032df 100644 --- a/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py +++ b/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py @@ -27,15 +27,15 @@ class UrduRomanSentimentClassification(AbsTaskClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{misc_roman_urdu_data_set_458, - author = {Sharf,Zareen}, - title = {{Roman Urdu Data Set}}, - year = {2018}, + bibtex_citation=r""" +@misc{misc_roman_urdu_data_set_458, + author = {Sharf,Zareen}, howpublished = {UCI Machine Learning Repository}, - note = {{DOI}: https://doi.org/10.24432/C58325} + note = {{DOI}: https://doi.org/10.24432/C58325}, + title = {{Roman Urdu Data Set}}, + year = {2018}, } - """, +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py b/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py index 901d2861f9..8d40b89ff8 100644 --- a/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py +++ b/mteb/tasks/Classification/vie/VieStudentFeedbackClassification.py @@ -29,16 +29,18 @@ class VieStudentFeedbackClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@InProceedings{8573337, - author={Nguyen, Kiet Van and Nguyen, Vu Duc and Nguyen, Phu X. V. and Truong, Tham T. H. and Nguyen, Ngan Luu-Thuy}, - booktitle={2018 10th International Conference on Knowledge and Systems Engineering (KSE)}, - title={UIT-VSFC: Vietnamese Students’ Feedback Corpus for Sentiment Analysis}, - year={2018}, - volume={}, - number={}, - pages={19-24}, - doi={10.1109/KSE.2018.8573337} -}""", + bibtex_citation=r""" +@inproceedings{8573337, + author = {Nguyen, Kiet Van and Nguyen, Vu Duc and Nguyen, Phu X. V. and Truong, Tham T. H. and Nguyen, Ngan Luu-Thuy}, + booktitle = {2018 10th International Conference on Knowledge and Systems Engineering (KSE)}, + doi = {10.1109/KSE.2018.8573337}, + number = {}, + pages = {19-24}, + title = {UIT-VSFC: Vietnamese Students’ Feedback Corpus for Sentiment Analysis}, + volume = {}, + year = {2018}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Classification/zho/CMTEBClassification.py b/mteb/tasks/Classification/zho/CMTEBClassification.py index 7e790ecf9a..64fb95298a 100644 --- a/mteb/tasks/Classification/zho/CMTEBClassification.py +++ b/mteb/tasks/Classification/zho/CMTEBClassification.py @@ -26,49 +26,51 @@ class TNews(AbsTaskClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings {xu-etal-2020-clue, - title = "{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark", - author = "Xu, Liang and - Hu, Hai and - Zhang, Xuanwei and - Li, Lu and - Cao, Chenjie and - Li, Yudong and - Xu, Yechen and - Sun, Kai and - Yu, Dian and - Yu, Cong and - Tian, Yin and - Dong, Qianqian and - Liu, Weitang and - Shi, Bo and - Cui, Yiming and - Li, Junyi and - Zeng, Jun and - Wang, Rongzhao and - Xie, Weijian and - Li, Yanting and - Patterson, Yina and - Tian, Zuoyu and - Zhang, Yiwen and - Zhou, He and - Liu, Shaoweihua and - Zhao, Zhe and - Zhao, Qipeng and - Yue, Cong and - Zhang, Xinrui and - Yang, Zhengliang and - Richardson, Kyle and - Lan, Zhenzhong ", - booktitle = "Proceedings of the 28th International Conference on Computational Linguistics", - month = dec, - year = "2020", - address = "Barcelona, Spain (Online)", - publisher = "International Committee on Computational Linguistics", - url = "https://aclanthology.org/2020.coling-main.419", - doi = "10.18653/v1/2020.coling-main.419", - pages = "4762--4772", -}""", + bibtex_citation=r""" +@inproceedings{xu-etal-2020-clue, + address = {Barcelona, Spain (Online)}, + author = {Xu, Liang and +Hu, Hai and +Zhang, Xuanwei and +Li, Lu and +Cao, Chenjie and +Li, Yudong and +Xu, Yechen and +Sun, Kai and +Yu, Dian and +Yu, Cong and +Tian, Yin and +Dong, Qianqian and +Liu, Weitang and +Shi, Bo and +Cui, Yiming and +Li, Junyi and +Zeng, Jun and +Wang, Rongzhao and +Xie, Weijian and +Li, Yanting and +Patterson, Yina and +Tian, Zuoyu and +Zhang, Yiwen and +Zhou, He and +Liu, Shaoweihua and +Zhao, Zhe and +Zhao, Qipeng and +Yue, Cong and +Zhang, Xinrui and +Yang, Zhengliang and +Richardson, Kyle and +Lan, Zhenzhong }, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.419}, + month = dec, + pages = {4762--4772}, + publisher = {International Committee on Computational Linguistics}, + title = {{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark}, + url = {https://aclanthology.org/2020.coling-main.419}, + year = {2020}, +} +""", prompt="Classify the fine-grained category of the given news title", ) @@ -97,50 +99,52 @@ class IFlyTek(AbsTaskClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings {xu-etal-2020-clue, - title = "{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark", - author = "Xu, Liang and - Hu, Hai and - Zhang, Xuanwei and - Li, Lu and - Cao, Chenjie and - Li, Yudong and - Xu, Yechen and - Sun, Kai and - Yu, Dian and - Yu, Cong and - Tian, Yin and - Dong, Qianqian and - Liu, Weitang and - Shi, Bo and - Cui, Yiming and - Li, Junyi and - Zeng, Jun and - Wang, Rongzhao and - Xie, Weijian and - Li, Yanting and - Patterson, Yina and - Tian, Zuoyu and - Zhang, Yiwen and - Zhou, He and - Liu, Shaoweihua and - Zhao, Zhe and - Zhao, Qipeng and - Yue, Cong and - Zhang, Xinrui and - Yang, Zhengliang and - Richardson, Kyle and - Lan, Zhenzhong ", - booktitle = "Proceedings of the 28th International Conference on Computational Linguistics", - month = dec, - year = "2020", - address = "Barcelona, Spain (Online)", - publisher = "International Committee on Computational Linguistics", - url = "https://aclanthology.org/2020.coling-main.419", - doi = "10.18653/v1/2020.coling-main.419", - pages = "4762--4772", - abstract = "The advent of natural language understanding (NLU) benchmarks for English, such as GLUE and SuperGLUE allows new NLU models to be evaluated across a diverse set of tasks. These comprehensive benchmarks have facilitated a broad range of research and applications in natural language processing (NLP). The problem, however, is that most such benchmarks are limited to English, which has made it difficult to replicate many of the successes in English NLU for other languages. To help remedy this issue, we introduce the first large-scale Chinese Language Understanding Evaluation (CLUE) benchmark. CLUE is an open-ended, community-driven project that brings together 9 tasks spanning several well-established single-sentence/sentence-pair classification tasks, as well as machine reading comprehension, all on original Chinese text. To establish results on these tasks, we report scores using an exhaustive set of current state-of-the-art pre-trained Chinese models (9 in total). We also introduce a number of supplementary datasets and additional tools to help facilitate further progress on Chinese NLU. Our benchmark is released at https://www.cluebenchmarks.com", -}""", + bibtex_citation=r""" +@inproceedings{xu-etal-2020-clue, + abstract = {The advent of natural language understanding (NLU) benchmarks for English, such as GLUE and SuperGLUE allows new NLU models to be evaluated across a diverse set of tasks. These comprehensive benchmarks have facilitated a broad range of research and applications in natural language processing (NLP). The problem, however, is that most such benchmarks are limited to English, which has made it difficult to replicate many of the successes in English NLU for other languages. To help remedy this issue, we introduce the first large-scale Chinese Language Understanding Evaluation (CLUE) benchmark. CLUE is an open-ended, community-driven project that brings together 9 tasks spanning several well-established single-sentence/sentence-pair classification tasks, as well as machine reading comprehension, all on original Chinese text. To establish results on these tasks, we report scores using an exhaustive set of current state-of-the-art pre-trained Chinese models (9 in total). We also introduce a number of supplementary datasets and additional tools to help facilitate further progress on Chinese NLU. Our benchmark is released at https://www.cluebenchmarks.com}, + address = {Barcelona, Spain (Online)}, + author = {Xu, Liang and +Hu, Hai and +Zhang, Xuanwei and +Li, Lu and +Cao, Chenjie and +Li, Yudong and +Xu, Yechen and +Sun, Kai and +Yu, Dian and +Yu, Cong and +Tian, Yin and +Dong, Qianqian and +Liu, Weitang and +Shi, Bo and +Cui, Yiming and +Li, Junyi and +Zeng, Jun and +Wang, Rongzhao and +Xie, Weijian and +Li, Yanting and +Patterson, Yina and +Tian, Zuoyu and +Zhang, Yiwen and +Zhou, He and +Liu, Shaoweihua and +Zhao, Zhe and +Zhao, Qipeng and +Yue, Cong and +Zhang, Xinrui and +Yang, Zhengliang and +Richardson, Kyle and +Lan, Zhenzhong }, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.419}, + month = dec, + pages = {4762--4772}, + publisher = {International Committee on Computational Linguistics}, + title = {{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark}, + url = {https://aclanthology.org/2020.coling-main.419}, + year = {2020}, +} +""", prompt="Given an App description text, find the appropriate fine-grained category", ) @@ -204,12 +208,14 @@ class JDReview(AbsTaskClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{xiao2023c, - title={C-pack: Packaged resources to advance general chinese embedding}, - author={Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas}, - journal={arXiv preprint arXiv:2309.07597}, - year={2023} -}""", + bibtex_citation=r""" +@article{xiao2023c, + author = {Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas}, + journal = {arXiv preprint arXiv:2309.07597}, + title = {C-pack: Packaged resources to advance general chinese embedding}, + year = {2023}, +} +""", prompt="Classify the customer review for iPhone on e-commerce platform into positive or negative", ) @@ -238,12 +244,14 @@ class OnlineShopping(AbsTaskClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{xiao2023c, - title={C-pack: Packaged resources to advance general chinese embedding}, - author={Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas}, - journal={arXiv preprint arXiv:2309.07597}, - year={2023} -}""", + bibtex_citation=r""" +@article{xiao2023c, + author = {Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas}, + journal = {arXiv preprint arXiv:2309.07597}, + title = {C-pack: Packaged resources to advance general chinese embedding}, + year = {2023}, +} +""", prompt="Classify the customer review for online shopping into positive or negative", ) @@ -272,12 +280,14 @@ class Waimai(AbsTaskClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{xiao2023c, - title={C-pack: Packaged resources to advance general chinese embedding}, - author={Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas}, - journal={arXiv preprint arXiv:2309.07597}, - year={2023} -}""", + bibtex_citation=r""" +@article{xiao2023c, + author = {Xiao, Shitao and Liu, Zheng and Zhang, Peitian and Muennighof, Niklas}, + journal = {arXiv preprint arXiv:2309.07597}, + title = {C-pack: Packaged resources to advance general chinese embedding}, + year = {2023}, +} +""", prompt="Classify the customer review from a food takeaway platform into positive or negative", ) diff --git a/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py b/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py index 2189708719..7c6134a731 100644 --- a/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py +++ b/mteb/tasks/Classification/zho/YueOpenriceReviewClassification.py @@ -26,14 +26,16 @@ class YueOpenriceReviewClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{xiang2019sentiment, - title={Sentiment Augmented Attention Network for Cantonese Restaurant Review Analysis}, - author={Xiang, Rong and Jiao, Ying and Lu, Qin}, - booktitle={Proceedings of the 8th KDD Workshop on Issues of Sentiment Discovery and Opinion Mining (WISDOM)}, - pages={1--9}, - year={2019}, - organization={KDD WISDOM} -}""", + bibtex_citation=r""" +@inproceedings{xiang2019sentiment, + author = {Xiang, Rong and Jiao, Ying and Lu, Qin}, + booktitle = {Proceedings of the 8th KDD Workshop on Issues of Sentiment Discovery and Opinion Mining (WISDOM)}, + organization = {KDD WISDOM}, + pages = {1--9}, + title = {Sentiment Augmented Attention Network for Cantonese Restaurant Review Analysis}, + year = {2019}, +} +""", ) samples_per_label = 32 diff --git a/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py b/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py index 26e3d16553..f8ca8c8e36 100644 --- a/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py +++ b/mteb/tasks/Classification/zul/IsiZuluNewsClassification.py @@ -26,8 +26,17 @@ class IsiZuluNewsClassification(AbsTaskClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{Madodonga_Marivate_Adendorff_2023, title={Izindaba-Tindzaba: Machine learning news categorisation for Long and Short Text for isiZulu and Siswati}, volume={4}, url={https://upjournals.up.ac.za/index.php/dhasa/article/view/4449}, DOI={10.55492/dhasa.v4i01.4449}, author={Madodonga, Andani and Marivate, Vukosi and Adendorff, Matthew}, year={2023}, month={Jan.} } - """, + bibtex_citation=r""" +@article{Madodonga_Marivate_Adendorff_2023, + author = {Madodonga, Andani and Marivate, Vukosi and Adendorff, Matthew}, + doi = {10.55492/dhasa.v4i01.4449}, + month = {Jan.}, + title = {Izindaba-Tindzaba: Machine learning news categorisation for Long and Short Text for isiZulu and Siswati}, + url = {https://upjournals.up.ac.za/index.php/dhasa/article/view/4449}, + volume = {4}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py b/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py index e8407b2429..859b13f5a7 100644 --- a/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py +++ b/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py @@ -32,13 +32,15 @@ class BlurbsClusteringP2P(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{Remus2019GermEval2T, - title={GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, - author={Steffen Remus and Rami Aly and Chris Biemann}, - booktitle={Conference on Natural Language Processing}, - year={2019}, - url={https://api.semanticscholar.org/CorpusID:208334484} -}""", + bibtex_citation=r""" +@inproceedings{Remus2019GermEval2T, + author = {Steffen Remus and Rami Aly and Chris Biemann}, + booktitle = {Conference on Natural Language Processing}, + title = {GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, + url = {https://api.semanticscholar.org/CorpusID:208334484}, + year = {2019}, +} +""", ) @@ -72,13 +74,15 @@ class BlurbsClusteringP2PFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{Remus2019GermEval2T, - title={GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, - author={Steffen Remus and Rami Aly and Chris Biemann}, - booktitle={Conference on Natural Language Processing}, - year={2019}, - url={https://api.semanticscholar.org/CorpusID:208334484} -}""", + bibtex_citation=r""" +@inproceedings{Remus2019GermEval2T, + author = {Steffen Remus and Rami Aly and Chris Biemann}, + booktitle = {Conference on Natural Language Processing}, + title = {GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, + url = {https://api.semanticscholar.org/CorpusID:208334484}, + year = {2019}, +} +""", adapted_from=["BlurbsClusteringP2P"], ) diff --git a/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py b/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py index 7847ecd768..354efa59ec 100644 --- a/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py +++ b/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py @@ -40,13 +40,15 @@ class BlurbsClusteringS2S(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{Remus2019GermEval2T, - title={GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, - author={Steffen Remus and Rami Aly and Chris Biemann}, - booktitle={Conference on Natural Language Processing}, - year={2019}, - url={https://api.semanticscholar.org/CorpusID:208334484} -}""", + bibtex_citation=r""" +@inproceedings{Remus2019GermEval2T, + author = {Steffen Remus and Rami Aly and Chris Biemann}, + booktitle = {Conference on Natural Language Processing}, + title = {GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, + url = {https://api.semanticscholar.org/CorpusID:208334484}, + year = {2019}, +} +""", ) @@ -81,13 +83,15 @@ class BlurbsClusteringS2SFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{Remus2019GermEval2T, - title={GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, - author={Steffen Remus and Rami Aly and Chris Biemann}, - booktitle={Conference on Natural Language Processing}, - year={2019}, - url={https://api.semanticscholar.org/CorpusID:208334484} -}""", + bibtex_citation=r""" +@inproceedings{Remus2019GermEval2T, + author = {Steffen Remus and Rami Aly and Chris Biemann}, + booktitle = {Conference on Natural Language Processing}, + title = {GermEval 2019 Task 1: Hierarchical Classification of Blurbs}, + url = {https://api.semanticscholar.org/CorpusID:208334484}, + year = {2019}, +} +""", adapted_from=["BlurbsClusteringS2S"], ) diff --git a/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py b/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py index 36155e7efc..efdd2a828b 100644 --- a/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py @@ -29,14 +29,16 @@ class ArxivClusteringP2P(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{arxiv_org_submitters_2024, - title={arXiv Dataset}, - url={https://www.kaggle.com/dsv/7548853}, - DOI={10.34740/KAGGLE/DSV/7548853}, - publisher={Kaggle}, - author={arXiv.org submitters}, - year={2024} -}""", + bibtex_citation=r""" +@misc{arxiv_org_submitters_2024, + author = {arXiv.org submitters}, + doi = {10.34740/KAGGLE/DSV/7548853}, + publisher = {Kaggle}, + title = {arXiv Dataset}, + url = {https://www.kaggle.com/dsv/7548853}, + year = {2024}, +} +""", prompt="Identify the main and secondary category of Arxiv papers based on the titles and abstracts", ) @@ -67,14 +69,16 @@ class ArxivClusteringP2PFast(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{arxiv_org_submitters_2024, - title={arXiv Dataset}, - url={https://www.kaggle.com/dsv/7548853}, - DOI={10.34740/KAGGLE/DSV/7548853}, - publisher={Kaggle}, - author={arXiv.org submitters}, - year={2024} -}""", # None found + bibtex_citation=r""" +@misc{arxiv_org_submitters_2024, + author = {arXiv.org submitters}, + doi = {10.34740/KAGGLE/DSV/7548853}, + publisher = {Kaggle}, + title = {arXiv Dataset}, + url = {https://www.kaggle.com/dsv/7548853}, + year = {2024}, +} +""", # None found prompt="Identify the main and secondary category of Arxiv papers based on the titles and abstracts", adapted_from=["ArxivClusteringP2P"], ) diff --git a/mteb/tasks/Clustering/eng/ArxivClusteringS2S.py b/mteb/tasks/Clustering/eng/ArxivClusteringS2S.py index 8b4beb0e26..d92139e5a9 100644 --- a/mteb/tasks/Clustering/eng/ArxivClusteringS2S.py +++ b/mteb/tasks/Clustering/eng/ArxivClusteringS2S.py @@ -28,13 +28,15 @@ class ArxivClusteringS2S(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{arxiv_org_submitters_2024, - title={arXiv Dataset}, - url={https://www.kaggle.com/dsv/7548853}, - DOI={10.34740/KAGGLE/DSV/7548853}, - publisher={Kaggle}, - author={arXiv.org submitters}, - year={2024} -}""", + bibtex_citation=r""" +@misc{arxiv_org_submitters_2024, + author = {arXiv.org submitters}, + doi = {10.34740/KAGGLE/DSV/7548853}, + publisher = {Kaggle}, + title = {arXiv Dataset}, + url = {https://www.kaggle.com/dsv/7548853}, + year = {2024}, +} +""", prompt="Identify the main and secondary category of Arxiv papers based on the titles", ) diff --git a/mteb/tasks/Clustering/eng/BigPatentClustering.py b/mteb/tasks/Clustering/eng/BigPatentClustering.py index 306119fed8..2148c7bb60 100644 --- a/mteb/tasks/Clustering/eng/BigPatentClustering.py +++ b/mteb/tasks/Clustering/eng/BigPatentClustering.py @@ -36,22 +36,24 @@ class BigPatentClustering(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{DBLP:journals/corr/abs-1906-03741, - author = {Eva Sharma and - Chen Li and - Lu Wang}, - title = {{BIGPATENT:} {A} Large-Scale Dataset for Abstractive and Coherent - Summarization}, - journal = {CoRR}, - volume = {abs/1906.03741}, - year = {2019}, - url = {http://arxiv.org/abs/1906.03741}, + bibtex_citation=r""" +@article{DBLP:journals/corr/abs-1906-03741, + author = {Eva Sharma and +Chen Li and +Lu Wang}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/abs-1906-03741.bib}, + eprint = {1906.03741}, eprinttype = {arXiv}, - eprint = {1906.03741}, + journal = {CoRR}, timestamp = {Wed, 26 Jun 2019 07:14:58 +0200}, - biburl = {https://dblp.org/rec/journals/corr/abs-1906-03741.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} -}""", + title = {{BIGPATENT:} {A} Large-Scale Dataset for Abstractive and Coherent +Summarization}, + url = {http://arxiv.org/abs/1906.03741}, + volume = {abs/1906.03741}, + year = {2019}, +} +""", ) @@ -82,22 +84,24 @@ class BigPatentClusteringFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/abs-1906-03741, - author = {Eva Sharma and - Chen Li and - Lu Wang}, - title = {{BIGPATENT:} {A} Large-Scale Dataset for Abstractive and Coherent - Summarization}, - journal = {CoRR}, - volume = {abs/1906.03741}, - year = {2019}, - url = {http://arxiv.org/abs/1906.03741}, + bibtex_citation=r""" +@article{DBLP:journals/corr/abs-1906-03741, + author = {Eva Sharma and +Chen Li and +Lu Wang}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/abs-1906-03741.bib}, + eprint = {1906.03741}, eprinttype = {arXiv}, - eprint = {1906.03741}, + journal = {CoRR}, timestamp = {Wed, 26 Jun 2019 07:14:58 +0200}, - biburl = {https://dblp.org/rec/journals/corr/abs-1906-03741.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} -}""", + title = {{BIGPATENT:} {A} Large-Scale Dataset for Abstractive and Coherent +Summarization}, + url = {http://arxiv.org/abs/1906.03741}, + volume = {abs/1906.03741}, + year = {2019}, +} +""", adapted_from=["BigPatentClustering"], ) diff --git a/mteb/tasks/Clustering/eng/BuiltBenchClusteringP2P.py b/mteb/tasks/Clustering/eng/BuiltBenchClusteringP2P.py index a7739a11da..8dc95ad597 100644 --- a/mteb/tasks/Clustering/eng/BuiltBenchClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/BuiltBenchClusteringP2P.py @@ -26,11 +26,13 @@ class BuiltBenchClusteringP2P(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation="""@article{shahinmoghadam2024benchmarking, - title={Benchmarking pre-trained text embedding models in aligning built asset information}, - author={Shahinmoghadam, Mehrzad and Motamedi, Ali}, - journal={arXiv preprint arXiv:2411.12056}, - year={2024} -}""", + bibtex_citation=r""" +@article{shahinmoghadam2024benchmarking, + author = {Shahinmoghadam, Mehrzad and Motamedi, Ali}, + journal = {arXiv preprint arXiv:2411.12056}, + title = {Benchmarking pre-trained text embedding models in aligning built asset information}, + year = {2024}, +} +""", prompt="Identify the category of the built asset entities based on the entity description", ) diff --git a/mteb/tasks/Clustering/eng/BuiltBenchClusteringS2S.py b/mteb/tasks/Clustering/eng/BuiltBenchClusteringS2S.py index 58b53a476d..78f0bb471b 100644 --- a/mteb/tasks/Clustering/eng/BuiltBenchClusteringS2S.py +++ b/mteb/tasks/Clustering/eng/BuiltBenchClusteringS2S.py @@ -26,11 +26,13 @@ class BuiltBenchClusteringS2S(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation="""@article{shahinmoghadam2024benchmarking, - title={Benchmarking pre-trained text embedding models in aligning built asset information}, - author={Shahinmoghadam, Mehrzad and Motamedi, Ali}, - journal={arXiv preprint arXiv:2411.12056}, - year={2024} -}""", + bibtex_citation=r""" +@article{shahinmoghadam2024benchmarking, + author = {Shahinmoghadam, Mehrzad and Motamedi, Ali}, + journal = {arXiv preprint arXiv:2411.12056}, + title = {Benchmarking pre-trained text embedding models in aligning built asset information}, + year = {2024}, +} +""", prompt="Identify the category of the built asset entities based on the names or titles", ) diff --git a/mteb/tasks/Clustering/eng/ClusTrecCovid.py b/mteb/tasks/Clustering/eng/ClusTrecCovid.py index 51fb455cd6..b6c10103a5 100644 --- a/mteb/tasks/Clustering/eng/ClusTrecCovid.py +++ b/mteb/tasks/Clustering/eng/ClusTrecCovid.py @@ -29,19 +29,20 @@ class ClusTrecCovid(AbsTaskClusteringFast, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{katz-etal-2024-knowledge, - title = "Knowledge Navigator: {LLM}-guided Browsing Framework for Exploratory Search in Scientific Literature", - author = "Katz, Uri and - Levy, Mosh and - Goldberg, Yoav", - booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024", - month = nov, - year = "2024", - address = "Miami, Florida, USA", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2024.findings-emnlp.516", - pages = "8838--8855", - } - """, + bibtex_citation=r""" +@inproceedings{katz-etal-2024-knowledge, + address = {Miami, Florida, USA}, + author = {Katz, Uri and +Levy, Mosh and +Goldberg, Yoav}, + booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2024}, + month = nov, + pages = {8838--8855}, + publisher = {Association for Computational Linguistics}, + title = {Knowledge Navigator: {LLM}-guided Browsing Framework for Exploratory Search in Scientific Literature}, + url = {https://aclanthology.org/2024.findings-emnlp.516}, + year = {2024}, +} +""", prompt="Identify the main category of the covid-19 papers based on the titles and abstracts", ) diff --git a/mteb/tasks/Clustering/eng/RedditClustering.py b/mteb/tasks/Clustering/eng/RedditClustering.py index a49b2b63c8..e1d111b8e6 100644 --- a/mteb/tasks/Clustering/eng/RedditClustering.py +++ b/mteb/tasks/Clustering/eng/RedditClustering.py @@ -34,19 +34,21 @@ class RedditFastClusteringS2S(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of Reddit posts based on the titles", adapted_from=["RedditClustering"], ) @@ -93,18 +95,20 @@ class RedditClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of Reddit posts based on the titles", ) diff --git a/mteb/tasks/Clustering/eng/RedditClusteringP2P.py b/mteb/tasks/Clustering/eng/RedditClusteringP2P.py index 243291cdbb..78352a2356 100644 --- a/mteb/tasks/Clustering/eng/RedditClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/RedditClusteringP2P.py @@ -36,19 +36,21 @@ class RedditClusteringP2P(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of Reddit posts based on the titles and posts", ) @@ -75,19 +77,21 @@ class RedditFastClusteringP2P(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of Reddit posts based on the titles and posts", adapted_from=["RedditClusteringP2P"], ) diff --git a/mteb/tasks/Clustering/eng/StackExchangeClustering.py b/mteb/tasks/Clustering/eng/StackExchangeClustering.py index 3ab53e4c0b..419f04c2c7 100644 --- a/mteb/tasks/Clustering/eng/StackExchangeClustering.py +++ b/mteb/tasks/Clustering/eng/StackExchangeClustering.py @@ -34,19 +34,21 @@ class StackExchangeClusteringFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of StackExchange posts based on the titles", adapted_from=["StackExchangeClustering"], ) @@ -95,18 +97,20 @@ class StackExchangeClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of StackExchange posts based on the titles", ) diff --git a/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py b/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py index 40b3bd82d6..c485e769da 100644 --- a/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py @@ -36,19 +36,21 @@ class StackExchangeClusteringP2PFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of StackExchange posts based on the given paragraphs", adapted_from=["StackExchangeClusteringP2P"], ) @@ -99,18 +101,20 @@ class StackExchangeClusteringP2P(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{geigle:2021:arxiv, - author = {Gregor Geigle and - Nils Reimers and - Andreas R{\"u}ckl{\'e} and - Iryna Gurevych}, - title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, - journal = {arXiv preprint}, - volume = {abs/2104.07081}, - year = {2021}, - url = {http://arxiv.org/abs/2104.07081}, - archivePrefix = {arXiv}, - eprint = {2104.07081} - }""", + bibtex_citation=r""" +@article{geigle:2021:arxiv, + archiveprefix = {arXiv}, + author = {Gregor Geigle and +Nils Reimers and +Andreas R{\"u}ckl{\'e} and +Iryna Gurevych}, + eprint = {2104.07081}, + journal = {arXiv preprint}, + title = {TWEAC: Transformer with Extendable QA Agent Classifiers}, + url = {http://arxiv.org/abs/2104.07081}, + volume = {abs/2104.07081}, + year = {2021}, +} +""", prompt="Identify the topic or theme of StackExchange posts based on the given paragraphs", ) diff --git a/mteb/tasks/Clustering/eng/TwentyNewsgroupsClustering.py b/mteb/tasks/Clustering/eng/TwentyNewsgroupsClustering.py index abdca6638c..5c82af900a 100644 --- a/mteb/tasks/Clustering/eng/TwentyNewsgroupsClustering.py +++ b/mteb/tasks/Clustering/eng/TwentyNewsgroupsClustering.py @@ -35,20 +35,21 @@ class TwentyNewsgroupsClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@incollection{LANG1995331, - title = {NewsWeeder: Learning to Filter Netnews}, - editor = {Armand Prieditis and Stuart Russell}, - booktitle = {Machine Learning Proceedings 1995}, - publisher = {Morgan Kaufmann}, - address = {San Francisco (CA)}, - pages = {331-339}, - year = {1995}, - isbn = {978-1-55860-377-6}, - doi = {https://doi.org/10.1016/B978-1-55860-377-6.50048-7}, - url = {https://www.sciencedirect.com/science/article/pii/B9781558603776500487}, - author = {Ken Lang}, - } - """, + bibtex_citation=r""" +@incollection{LANG1995331, + address = {San Francisco (CA)}, + author = {Ken Lang}, + booktitle = {Machine Learning Proceedings 1995}, + doi = {https://doi.org/10.1016/B978-1-55860-377-6.50048-7}, + editor = {Armand Prieditis and Stuart Russell}, + isbn = {978-1-55860-377-6}, + pages = {331-339}, + publisher = {Morgan Kaufmann}, + title = {NewsWeeder: Learning to Filter Netnews}, + url = {https://www.sciencedirect.com/science/article/pii/B9781558603776500487}, + year = {1995}, +} +""", prompt="Identify the topic or theme of the given news articles", ) @@ -75,20 +76,21 @@ class TwentyNewsgroupsClusteringFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@incollection{LANG1995331, - title = {NewsWeeder: Learning to Filter Netnews}, - editor = {Armand Prieditis and Stuart Russell}, - booktitle = {Machine Learning Proceedings 1995}, - publisher = {Morgan Kaufmann}, - address = {San Francisco (CA)}, - pages = {331-339}, - year = {1995}, - isbn = {978-1-55860-377-6}, - doi = {https://doi.org/10.1016/B978-1-55860-377-6.50048-7}, - url = {https://www.sciencedirect.com/science/article/pii/B9781558603776500487}, - author = {Ken Lang}, - } - """, + bibtex_citation=r""" +@incollection{LANG1995331, + address = {San Francisco (CA)}, + author = {Ken Lang}, + booktitle = {Machine Learning Proceedings 1995}, + doi = {https://doi.org/10.1016/B978-1-55860-377-6.50048-7}, + editor = {Armand Prieditis and Stuart Russell}, + isbn = {978-1-55860-377-6}, + pages = {331-339}, + publisher = {Morgan Kaufmann}, + title = {NewsWeeder: Learning to Filter Netnews}, + url = {https://www.sciencedirect.com/science/article/pii/B9781558603776500487}, + year = {1995}, +} +""", prompt="Identify the topic or theme of the given news articles", adapted_from=["TwentyNewsgroupsClustering"], ) diff --git a/mteb/tasks/Clustering/eng/WikiCitiesClustering.py b/mteb/tasks/Clustering/eng/WikiCitiesClustering.py index be897938a8..3f37207640 100644 --- a/mteb/tasks/Clustering/eng/WikiCitiesClustering.py +++ b/mteb/tasks/Clustering/eng/WikiCitiesClustering.py @@ -27,9 +27,11 @@ class WikiCitiesClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@ONLINE{wikidump, - author = "Wikimedia Foundation", - title = "Wikimedia Downloads", - url = "https://dumps.wikimedia.org" -}""", + bibtex_citation=r""" +@online{wikidump, + author = {Wikimedia Foundation}, + title = {Wikimedia Downloads}, + url = {https://dumps.wikimedia.org}, +} +""", ) diff --git a/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py b/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py index a4e4082a69..05b7aec7bf 100644 --- a/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py +++ b/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py @@ -26,12 +26,12 @@ class WikipediaChemistrySpecialtiesClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py b/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py index bfa5e1fcf3..a170d89107 100644 --- a/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py +++ b/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py @@ -26,12 +26,12 @@ class WikipediaChemistryTopicsClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py b/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py index b1ef7f09b6..83a22953fb 100644 --- a/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py +++ b/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py @@ -38,16 +38,18 @@ class AlloProfClusteringP2P(AbsTaskClustering): annotations_creators="human-annotated", dialect=None, sample_creation="found", - bibtex_citation="""@misc{lef23, - doi = {10.48550/ARXIV.2302.07738}, - url = {https://arxiv.org/abs/2302.07738}, + bibtex_citation=r""" +@misc{lef23, author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}, + doi = {10.48550/ARXIV.2302.07738}, keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, publisher = {arXiv}, + title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, + url = {https://arxiv.org/abs/2302.07738}, year = {2023}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} -}""", +} +""", ) def create_description(self, example): @@ -96,15 +98,16 @@ class AlloProfClusteringP2PFast(AbsTaskClusteringFast): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{lef23, - doi = {10.48550/ARXIV.2302.07738}, - url = {https://arxiv.org/abs/2302.07738}, + bibtex_citation=r""" +@misc{lef23, author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}, + doi = {10.48550/ARXIV.2302.07738}, keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, publisher = {arXiv}, + title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, + url = {https://arxiv.org/abs/2302.07738}, year = {2023}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} } """, adapted_from=["AlloProfClusteringP2P"], diff --git a/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py b/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py index fcd2e18455..349faaec7f 100644 --- a/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py +++ b/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py @@ -38,16 +38,18 @@ class AlloProfClusteringS2S(AbsTaskClustering): annotations_creators="human-annotated", dialect=None, sample_creation="found", - bibtex_citation="""@misc{lef23, - doi = {10.48550/ARXIV.2302.07738}, - url = {https://arxiv.org/abs/2302.07738}, + bibtex_citation=r""" +@misc{lef23, author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}, + doi = {10.48550/ARXIV.2302.07738}, keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, publisher = {arXiv}, + title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, + url = {https://arxiv.org/abs/2302.07738}, year = {2023}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} -}""", +} +""", ) def dataset_transform(self): @@ -93,15 +95,16 @@ class AlloProfClusteringS2SFast(AbsTaskClusteringFast): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{lef23, - doi = {10.48550/ARXIV.2302.07738}, - url = {https://arxiv.org/abs/2302.07738}, + bibtex_citation=r""" +@misc{lef23, author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}, + doi = {10.48550/ARXIV.2302.07738}, keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, publisher = {arXiv}, + title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, + url = {https://arxiv.org/abs/2302.07738}, year = {2023}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} } """, adapted_from=["AlloProfClusteringS2S"], diff --git a/mteb/tasks/Clustering/fra/HALClusteringS2S.py b/mteb/tasks/Clustering/fra/HALClusteringS2S.py index cb4cc319a7..eaeb4924a0 100644 --- a/mteb/tasks/Clustering/fra/HALClusteringS2S.py +++ b/mteb/tasks/Clustering/fra/HALClusteringS2S.py @@ -40,14 +40,16 @@ class HALClusteringS2S(AbsTaskClustering): annotations_creators="human-annotated", dialect=None, sample_creation="found", - bibtex_citation="""@misc{ciancone2024extending, - title={Extending the Massive Text Embedding Benchmark to French}, - author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, - year={2024}, - eprint={2405.20468}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{ciancone2024extending, + archiveprefix = {arXiv}, + author = {Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, + eprint = {2405.20468}, + primaryclass = {cs.CL}, + title = {Extending the Massive Text Embedding Benchmark to French}, + year = {2024}, +} +""", ) def dataset_transform(self): @@ -87,14 +89,16 @@ class HALClusteringS2SFast(AbsTaskClusteringFast): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{ciancone2024extending, - title={Extending the Massive Text Embedding Benchmark to French}, - author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, - year={2024}, - eprint={2405.20468}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{ciancone2024extending, + archiveprefix = {arXiv}, + author = {Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, + eprint = {2405.20468}, + primaryclass = {cs.CL}, + title = {Extending the Massive Text Embedding Benchmark to French}, + year = {2024}, +} +""", adapted_from=["HALClusteringS2S"], ) diff --git a/mteb/tasks/Clustering/jpn/MewsC16JaClustering.py b/mteb/tasks/Clustering/jpn/MewsC16JaClustering.py index 5c8bfe01fa..dfe010cdb6 100644 --- a/mteb/tasks/Clustering/jpn/MewsC16JaClustering.py +++ b/mteb/tasks/Clustering/jpn/MewsC16JaClustering.py @@ -33,25 +33,24 @@ class MewsC16JaClustering(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{ - nishikawa-etal-2022-ease, - title = "{EASE}: Entity-Aware Contrastive Learning of Sentence Embedding", - author = "Nishikawa, Sosuke and - Ri, Ryokan and - Yamada, Ikuya and - Tsuruoka, Yoshimasa and - Echizen, Isao", - booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies", - month = jul, - year = "2022", - address = "Seattle, United States", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.naacl-main.284", - pages = "3870--3885", - abstract = "We present EASE, a novel method for learning sentence embeddings via contrastive learning between sentences and their related entities.The advantage of using entity supervision is twofold: (1) entities have been shown to be a strong indicator of text semantics and thus should provide rich training signals for sentence embeddings; (2) entities are defined independently of languages and thus offer useful cross-lingual alignment supervision.We evaluate EASE against other unsupervised models both in monolingual and multilingual settings.We show that EASE exhibits competitive or better performance in English semantic textual similarity (STS) and short text clustering (STC) tasks and it significantly outperforms baseline methods in multilingual settings on a variety of tasks.Our source code, pre-trained models, and newly constructed multi-lingual STC dataset are available at https://github.com/studio-ousia/ease.", - } - """, + bibtex_citation=r""" +@inproceedings{nishikawa-etal-2022-ease, + abstract = {We present EASE, a novel method for learning sentence embeddings via contrastive learning between sentences and their related entities.The advantage of using entity supervision is twofold: (1) entities have been shown to be a strong indicator of text semantics and thus should provide rich training signals for sentence embeddings; (2) entities are defined independently of languages and thus offer useful cross-lingual alignment supervision.We evaluate EASE against other unsupervised models both in monolingual and multilingual settings.We show that EASE exhibits competitive or better performance in English semantic textual similarity (STS) and short text clustering (STC) tasks and it significantly outperforms baseline methods in multilingual settings on a variety of tasks.Our source code, pre-trained models, and newly constructed multi-lingual STC dataset are available at https://github.com/studio-ousia/ease.}, + address = {Seattle, United States}, + author = {Nishikawa, Sosuke and +Ri, Ryokan and +Yamada, Ikuya and +Tsuruoka, Yoshimasa and +Echizen, Isao}, + booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, + month = jul, + pages = {3870--3885}, + publisher = {Association for Computational Linguistics}, + title = {{EASE}: Entity-Aware Contrastive Learning of Sentence Embedding}, + url = {https://aclanthology.org/2022.naacl-main.284}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Clustering/kor/KlueMrcDomainClustering.py b/mteb/tasks/Clustering/kor/KlueMrcDomainClustering.py index fc2b27b884..de070dd087 100644 --- a/mteb/tasks/Clustering/kor/KlueMrcDomainClustering.py +++ b/mteb/tasks/Clustering/kor/KlueMrcDomainClustering.py @@ -28,14 +28,16 @@ class KlueMrcDomainClustering(AbsTaskClustering): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{park2021klue, - title={KLUE: Korean Language Understanding Evaluation}, - author={Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, - year={2021}, - eprint={2105.09680}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, -}""", + bibtex_citation=r""" +@misc{park2021klue, + archiveprefix = {arXiv}, + author = {Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, + eprint = {2105.09680}, + primaryclass = {cs.CL}, + title = {KLUE: Korean Language Understanding Evaluation}, + year = {2021}, +} +""", prompt="Identify the topic or theme of the given texts", ) diff --git a/mteb/tasks/Clustering/kor/KlueYnatMrcCategoryClustering.py b/mteb/tasks/Clustering/kor/KlueYnatMrcCategoryClustering.py index d31dd87add..14358e7206 100644 --- a/mteb/tasks/Clustering/kor/KlueYnatMrcCategoryClustering.py +++ b/mteb/tasks/Clustering/kor/KlueYnatMrcCategoryClustering.py @@ -28,14 +28,16 @@ class KlueYnatMrcCategoryClustering(AbsTaskClustering): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{park2021klue, - title={KLUE: Korean Language Understanding Evaluation}, - author={Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, - year={2021}, - eprint={2105.09680}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, -}""", + bibtex_citation=r""" +@misc{park2021klue, + archiveprefix = {arXiv}, + author = {Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, + eprint = {2105.09680}, + primaryclass = {cs.CL}, + title = {KLUE: Korean Language Understanding Evaluation}, + year = {2021}, +} +""", prompt="Identify the topic or theme of the given texts", ) diff --git a/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py b/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py index 8f649a745b..3ea45b2d99 100644 --- a/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py @@ -49,13 +49,15 @@ class IndicReviewsClusteringP2P(AbsTaskClustering, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@article{doddapaneni2022towards, - title = {Towards Leaving No Indic Language Behind: Building Monolingual Corpora, Benchmark and Models for Indic Languages}, - author = {Sumanth Doddapaneni and Rahul Aralikatte and Gowtham Ramesh and Shreyansh Goyal and Mitesh M. Khapra and Anoop Kunchukuttan and Pratyush Kumar}, - journal = {Annual Meeting of the Association for Computational Linguistics}, - year = {2022}, - doi = {10.18653/v1/2023.acl-long.693} -}""", + bibtex_citation=r""" +@article{doddapaneni2022towards, + author = {Sumanth Doddapaneni and Rahul Aralikatte and Gowtham Ramesh and Shreyansh Goyal and Mitesh M. Khapra and Anoop Kunchukuttan and Pratyush Kumar}, + doi = {10.18653/v1/2023.acl-long.693}, + journal = {Annual Meeting of the Association for Computational Linguistics}, + title = {Towards Leaving No Indic Language Behind: Building Monolingual Corpora, Benchmark and Models for Indic Languages}, + year = {2022}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py index 90d6fb17ba..fc341c00d0 100644 --- a/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py @@ -44,12 +44,14 @@ class MLSUMClusteringP2P(AbsTaskClustering, MultilingualTask): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{scialom2020mlsum, - title={MLSUM: The Multilingual Summarization Corpus}, - author={Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, - journal={arXiv preprint arXiv:2004.14900}, - year={2020} - }""", + bibtex_citation=r""" +@article{scialom2020mlsum, + author = {Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, + journal = {arXiv preprint arXiv:2004.14900}, + title = {MLSUM: The Multilingual Summarization Corpus}, + year = {2020}, +} +""", ) def load_data(self, **kwargs): @@ -114,12 +116,14 @@ class MLSUMClusteringP2PFast(AbsTaskClusteringFast, MultilingualTask): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{scialom2020mlsum, - title={MLSUM: The Multilingual Summarization Corpus}, - author={Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, - journal={arXiv preprint arXiv:2004.14900}, - year={2020} - }""", + bibtex_citation=r""" +@article{scialom2020mlsum, + author = {Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, + journal = {arXiv preprint arXiv:2004.14900}, + title = {MLSUM: The Multilingual Summarization Corpus}, + year = {2020}, +} +""", adapted_from=["MLSUMClusteringP2P"], ) diff --git a/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py index 3cd6aa2d3a..f29200c233 100644 --- a/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py @@ -44,12 +44,14 @@ class MLSUMClusteringS2S(AbsTaskClustering, MultilingualTask): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{scialom2020mlsum, - title={MLSUM: The Multilingual Summarization Corpus}, - author={Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, - journal={arXiv preprint arXiv:2004.14900}, - year={2020} - }""", + bibtex_citation=r""" +@article{scialom2020mlsum, + author = {Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, + journal = {arXiv preprint arXiv:2004.14900}, + title = {MLSUM: The Multilingual Summarization Corpus}, + year = {2020}, +} +""", ) def load_data(self, **kwargs): @@ -109,12 +111,14 @@ class MLSUMClusteringS2SFast(AbsTaskClusteringFast, MultilingualTask): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{scialom2020mlsum, - title={MLSUM: The Multilingual Summarization Corpus}, - author={Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, - journal={arXiv preprint arXiv:2004.14900}, - year={2020} - }""", + bibtex_citation=r""" +@article{scialom2020mlsum, + author = {Scialom, Thomas and Dray, Paul-Alexis and Lamprier, Sylvain and Piwowarski, Benjamin and Staiano, Jacopo}, + journal = {arXiv preprint arXiv:2004.14900}, + title = {MLSUM: The Multilingual Summarization Corpus}, + year = {2020}, +} +""", adapted_from=["MLSUMClusteringS2S"], ) diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py index 480cceff8f..a8c611f00f 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py @@ -53,13 +53,15 @@ class MasakhaNEWSClusteringP2P(AbsTaskClustering, MultilingualTask): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{adelani2023masakhanews, - title={MasakhaNEWS: News Topic Classification for African languages}, - author={David Ifeoluwa Adelani and Marek Masiak and Israel Abebe Azime and Jesujoba Oluwadara Alabi and Atnafu Lambebo Tonja and Christine Mwase and Odunayo Ogundepo and Bonaventure F. P. Dossou and Akintunde Oladipo and Doreen Nixdorf and Chris Chinenye Emezue and Sana Sabah al-azzawi and Blessing K. Sibanda and Davis David and Lolwethu Ndolela and Jonathan Mukiibi and Tunde Oluwaseyi Ajayi and Tatiana Moteu Ngoli and Brian Odhiambo and Abraham Toluwase Owodunni and Nnaemeka C. Obiefuna and Shamsuddeen Hassan Muhammad and Saheed Salahudeen Abdullahi and Mesay Gemeda Yigezu and Tajuddeen Gwadabe and Idris Abdulmumin and Mahlet Taye Bame and Oluwabusayo Olufunke Awoyomi and Iyanuoluwa Shode and Tolulope Anu Adelani and Habiba Abdulganiy Kailani and Abdul-Hakeem Omotayo and Adetola Adeeko and Afolabi Abeeb and Anuoluwapo Aremu and Olanrewaju Samuel and Clemencia Siro and Wangari Kimotho and Onyekachi Raphael Ogbu and Chinedu E. Mbonu and Chiamaka I. Chukwuneke and Samuel Fanijo and Jessica Ojo and Oyinkansola F. Awosan and Tadesse Kebede Guge and Sakayo Toadoum Sari and Pamela Nyatsine and Freedmore Sidume and Oreen Yousuf and Mardiyyah Oduwole and Ussen Kimanuka and Kanda Patrick Tshinu and Thina Diko and Siyanda Nxakama and Abdulmejid Tuni Johar and Sinodos Gebre and Muhidin Mohamed and Shafie Abdi Mohamed and Fuad Mire Hassan and Moges Ahmed Mehamed and Evrard Ngabire and and Pontus Stenetorp}, - journal={ArXiv}, - year={2023}, - volume={} -}""", + bibtex_citation=r""" +@article{adelani2023masakhanews, + author = {David Ifeoluwa Adelani and Marek Masiak and Israel Abebe Azime and Jesujoba Oluwadara Alabi and Atnafu Lambebo Tonja and Christine Mwase and Odunayo Ogundepo and Bonaventure F. P. Dossou and Akintunde Oladipo and Doreen Nixdorf and Chris Chinenye Emezue and Sana Sabah al-azzawi and Blessing K. Sibanda and Davis David and Lolwethu Ndolela and Jonathan Mukiibi and Tunde Oluwaseyi Ajayi and Tatiana Moteu Ngoli and Brian Odhiambo and Abraham Toluwase Owodunni and Nnaemeka C. Obiefuna and Shamsuddeen Hassan Muhammad and Saheed Salahudeen Abdullahi and Mesay Gemeda Yigezu and Tajuddeen Gwadabe and Idris Abdulmumin and Mahlet Taye Bame and Oluwabusayo Olufunke Awoyomi and Iyanuoluwa Shode and Tolulope Anu Adelani and Habiba Abdulganiy Kailani and Abdul-Hakeem Omotayo and Adetola Adeeko and Afolabi Abeeb and Anuoluwapo Aremu and Olanrewaju Samuel and Clemencia Siro and Wangari Kimotho and Onyekachi Raphael Ogbu and Chinedu E. Mbonu and Chiamaka I. Chukwuneke and Samuel Fanijo and Jessica Ojo and Oyinkansola F. Awosan and Tadesse Kebede Guge and Sakayo Toadoum Sari and Pamela Nyatsine and Freedmore Sidume and Oreen Yousuf and Mardiyyah Oduwole and Ussen Kimanuka and Kanda Patrick Tshinu and Thina Diko and Siyanda Nxakama and Abdulmejid Tuni Johar and Sinodos Gebre and Muhidin Mohamed and Shafie Abdi Mohamed and Fuad Mire Hassan and Moges Ahmed Mehamed and Evrard Ngabire and and Pontus Stenetorp}, + journal = {ArXiv}, + title = {MasakhaNEWS: News Topic Classification for African languages}, + volume = {}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py index 7af80b5cdd..6ccddba538 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py @@ -52,13 +52,15 @@ class MasakhaNEWSClusteringS2S(AbsTaskClustering, MultilingualTask): annotations_creators="human-annotated", dialect=None, sample_creation=None, - bibtex_citation="""@article{adelani2023masakhanews, - title={MasakhaNEWS: News Topic Classification for African languages}, - author={David Ifeoluwa Adelani and Marek Masiak and Israel Abebe Azime and Jesujoba Oluwadara Alabi and Atnafu Lambebo Tonja and Christine Mwase and Odunayo Ogundepo and Bonaventure F. P. Dossou and Akintunde Oladipo and Doreen Nixdorf and Chris Chinenye Emezue and Sana Sabah al-azzawi and Blessing K. Sibanda and Davis David and Lolwethu Ndolela and Jonathan Mukiibi and Tunde Oluwaseyi Ajayi and Tatiana Moteu Ngoli and Brian Odhiambo and Abraham Toluwase Owodunni and Nnaemeka C. Obiefuna and Shamsuddeen Hassan Muhammad and Saheed Salahudeen Abdullahi and Mesay Gemeda Yigezu and Tajuddeen Gwadabe and Idris Abdulmumin and Mahlet Taye Bame and Oluwabusayo Olufunke Awoyomi and Iyanuoluwa Shode and Tolulope Anu Adelani and Habiba Abdulganiy Kailani and Abdul-Hakeem Omotayo and Adetola Adeeko and Afolabi Abeeb and Anuoluwapo Aremu and Olanrewaju Samuel and Clemencia Siro and Wangari Kimotho and Onyekachi Raphael Ogbu and Chinedu E. Mbonu and Chiamaka I. Chukwuneke and Samuel Fanijo and Jessica Ojo and Oyinkansola F. Awosan and Tadesse Kebede Guge and Sakayo Toadoum Sari and Pamela Nyatsine and Freedmore Sidume and Oreen Yousuf and Mardiyyah Oduwole and Ussen Kimanuka and Kanda Patrick Tshinu and Thina Diko and Siyanda Nxakama and Abdulmejid Tuni Johar and Sinodos Gebre and Muhidin Mohamed and Shafie Abdi Mohamed and Fuad Mire Hassan and Moges Ahmed Mehamed and Evrard Ngabire and and Pontus Stenetorp}, - journal={ArXiv}, - year={2023}, - volume={} -}""", + bibtex_citation=r""" +@article{adelani2023masakhanews, + author = {David Ifeoluwa Adelani and Marek Masiak and Israel Abebe Azime and Jesujoba Oluwadara Alabi and Atnafu Lambebo Tonja and Christine Mwase and Odunayo Ogundepo and Bonaventure F. P. Dossou and Akintunde Oladipo and Doreen Nixdorf and Chris Chinenye Emezue and Sana Sabah al-azzawi and Blessing K. Sibanda and Davis David and Lolwethu Ndolela and Jonathan Mukiibi and Tunde Oluwaseyi Ajayi and Tatiana Moteu Ngoli and Brian Odhiambo and Abraham Toluwase Owodunni and Nnaemeka C. Obiefuna and Shamsuddeen Hassan Muhammad and Saheed Salahudeen Abdullahi and Mesay Gemeda Yigezu and Tajuddeen Gwadabe and Idris Abdulmumin and Mahlet Taye Bame and Oluwabusayo Olufunke Awoyomi and Iyanuoluwa Shode and Tolulope Anu Adelani and Habiba Abdulganiy Kailani and Abdul-Hakeem Omotayo and Adetola Adeeko and Afolabi Abeeb and Anuoluwapo Aremu and Olanrewaju Samuel and Clemencia Siro and Wangari Kimotho and Onyekachi Raphael Ogbu and Chinedu E. Mbonu and Chiamaka I. Chukwuneke and Samuel Fanijo and Jessica Ojo and Oyinkansola F. Awosan and Tadesse Kebede Guge and Sakayo Toadoum Sari and Pamela Nyatsine and Freedmore Sidume and Oreen Yousuf and Mardiyyah Oduwole and Ussen Kimanuka and Kanda Patrick Tshinu and Thina Diko and Siyanda Nxakama and Abdulmejid Tuni Johar and Sinodos Gebre and Muhidin Mohamed and Shafie Abdi Mohamed and Fuad Mire Hassan and Moges Ahmed Mehamed and Evrard Ngabire and and Pontus Stenetorp}, + journal = {ArXiv}, + title = {MasakhaNEWS: News Topic Classification for African languages}, + volume = {}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py b/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py index 8569b55cd5..68e66ddc3e 100644 --- a/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py @@ -237,12 +237,14 @@ class SIB200ClusteringFast(MultilingualTask, AbsTaskClusteringFast): annotations_creators="expert-annotated", # expert annotated for English --> human translations dialect=[], sample_creation="human-translated and localized", - bibtex_citation="""@article{adelani2023sib, - title={SIB-200: A simple, inclusive, and big evaluation dataset for topic classification in 200+ languages and dialects}, - author={Adelani, David Ifeoluwa and Liu, Hannah and Shen, Xiaoyu and Vassilyev, Nikita and Alabi, Jesujoba O and Mao, Yanke and Gao, Haonan and Lee, Annie En-Shiun}, - journal={arXiv preprint arXiv:2309.07445}, - year={2023} - }""", # combined train, validation, and test into test. + bibtex_citation=r""" +@article{adelani2023sib, + author = {Adelani, David Ifeoluwa and Liu, Hannah and Shen, Xiaoyu and Vassilyev, Nikita and Alabi, Jesujoba O and Mao, Yanke and Gao, Haonan and Lee, Annie En-Shiun}, + journal = {arXiv preprint arXiv:2309.07445}, + title = {SIB-200: A simple, inclusive, and big evaluation dataset for topic classification in 200+ languages and dialects}, + year = {2023}, +} +""", # combined train, validation, and test into test. ) def dataset_transform(self): diff --git a/mteb/tasks/Clustering/nob/SNLHierarchicalClustering.py b/mteb/tasks/Clustering/nob/SNLHierarchicalClustering.py index 081a99aebd..19a3d879c4 100644 --- a/mteb/tasks/Clustering/nob/SNLHierarchicalClustering.py +++ b/mteb/tasks/Clustering/nob/SNLHierarchicalClustering.py @@ -36,12 +36,14 @@ class SNLHierarchicalClusteringP2P(AbsTaskClusteringFast): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", prompt="Identify categories in a Norwegian lexicon", ) max_depth = 5 @@ -78,12 +80,14 @@ class SNLHierarchicalClusteringS2S(AbsTaskClusteringFast): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", prompt="Identify categories in a Norwegian lexicon", ) max_depth = 5 diff --git a/mteb/tasks/Clustering/nob/VGHierarchicalClustering.py b/mteb/tasks/Clustering/nob/VGHierarchicalClustering.py index eda3aff310..750d156d68 100644 --- a/mteb/tasks/Clustering/nob/VGHierarchicalClustering.py +++ b/mteb/tasks/Clustering/nob/VGHierarchicalClustering.py @@ -36,12 +36,14 @@ class VGHierarchicalClusteringP2P(AbsTaskClusteringFast): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", prompt="Identify the categories (e.g. sports) of given articles in Norwegian", ) @@ -81,12 +83,14 @@ class VGHierarchicalClusteringS2S(AbsTaskClusteringFast): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", prompt="Identify the categories (e.g. sports) of given articles in Norwegian", ) diff --git a/mteb/tasks/Clustering/nob/snl_clustering.py b/mteb/tasks/Clustering/nob/snl_clustering.py index 9256fc66c0..ae63ba1983 100644 --- a/mteb/tasks/Clustering/nob/snl_clustering.py +++ b/mteb/tasks/Clustering/nob/snl_clustering.py @@ -45,12 +45,14 @@ class SNLClustering(AbsTaskClustering): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Clustering/nob/vg_clustering.py b/mteb/tasks/Clustering/nob/vg_clustering.py index f1050e796b..d3e6aecdd2 100644 --- a/mteb/tasks/Clustering/nob/vg_clustering.py +++ b/mteb/tasks/Clustering/nob/vg_clustering.py @@ -45,12 +45,14 @@ class VGClustering(AbsTaskClustering): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Clustering/pol/PolishClustering.py b/mteb/tasks/Clustering/pol/PolishClustering.py index 86626e366d..8e4358e3c4 100644 --- a/mteb/tasks/Clustering/pol/PolishClustering.py +++ b/mteb/tasks/Clustering/pol/PolishClustering.py @@ -39,36 +39,38 @@ class EightTagsClustering(AbsTaskClustering): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{dadas-etal-2020-evaluation, - title = "Evaluation of Sentence Representations in {P}olish", - author = "Dadas, Slawomir and - Pere{\\l}kiewicz, Micha{\\l} and - Po{\\'s}wiata, Rafa{\\l}", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\\'e}l{\\`e}ne and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.207", - pages = "1674--1680", - abstract = "Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.", - language = "English", - ISBN = "979-10-95546-34-4", - }""", + bibtex_citation=r""" +@inproceedings{dadas-etal-2020-evaluation, + abstract = {Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.}, + address = {Marseille, France}, + author = {Dadas, Slawomir and +Pere{\\l}kiewicz, Micha{\\l} and +Po{\\'s}wiata, Rafa{\\l}}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\\'e}l{\\`e}ne and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + isbn = {979-10-95546-34-4}, + language = {English}, + month = may, + pages = {1674--1680}, + publisher = {European Language Resources Association}, + title = {Evaluation of Sentence Representations in {P}olish}, + url = {https://aclanthology.org/2020.lrec-1.207}, + year = {2020}, +} +""", ) @@ -98,36 +100,38 @@ class EightTagsClusteringFast(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{dadas-etal-2020-evaluation, - title = "Evaluation of Sentence Representations in {P}olish", - author = "Dadas, Slawomir and - Pere{\\l}kiewicz, Micha{\\l} and - Po{\\'s}wiata, Rafa{\\l}", - editor = "Calzolari, Nicoletta and - B{\\'e}chet, Fr{\\'e}d{\\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\\'e}l{\\`e}ne and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.207", - pages = "1674--1680", - abstract = "Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.", - language = "English", - ISBN = "979-10-95546-34-4", - }""", + bibtex_citation=r""" +@inproceedings{dadas-etal-2020-evaluation, + abstract = {Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.}, + address = {Marseille, France}, + author = {Dadas, Slawomir and +Pere{\\l}kiewicz, Micha{\\l} and +Po{\\'s}wiata, Rafa{\\l}}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\\'e}chet, Fr{\\'e}d{\\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\\'e}l{\\`e}ne and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + isbn = {979-10-95546-34-4}, + language = {English}, + month = may, + pages = {1674--1680}, + publisher = {European Language Resources Association}, + title = {Evaluation of Sentence Representations in {P}olish}, + url = {https://aclanthology.org/2020.lrec-1.207}, + year = {2020}, +} +""", adapted_from=["EightTagsClustering"], ) diff --git a/mteb/tasks/Clustering/swe/SwednClustering.py b/mteb/tasks/Clustering/swe/SwednClustering.py index bef817ab6f..6845806aa8 100644 --- a/mteb/tasks/Clustering/swe/SwednClustering.py +++ b/mteb/tasks/Clustering/swe/SwednClustering.py @@ -81,12 +81,14 @@ class SwednClusteringP2P(AbsTaskClusteringFast): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@inproceedings{monsen2021method, - title={A method for building non-english corpora for abstractive text summarization}, - author={Monsen, Julius and J{\"o}nsson, Arne}, - booktitle={Proceedings of CLARIN Annual Conference}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{monsen2021method, + author = {Monsen, Julius and J{\"o}nsson, Arne}, + booktitle = {Proceedings of CLARIN Annual Conference}, + title = {A method for building non-english corpora for abstractive text summarization}, + year = {2021}, +} +""", prompt="Identify news categories in Swedish passages", ) @@ -121,12 +123,14 @@ class SwednClusteringFastS2S(AbsTaskClusteringFast): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@inproceedings{monsen2021method, - title={A method for building non-english corpora for abstractive text summarization}, - author={Monsen, Julius and J{\"o}nsson, Arne}, - booktitle={Proceedings of CLARIN Annual Conference}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{monsen2021method, + author = {Monsen, Julius and J{\"o}nsson, Arne}, + booktitle = {Proceedings of CLARIN Annual Conference}, + title = {A method for building non-english corpora for abstractive text summarization}, + year = {2021}, +} +""", prompt="Identify news categories in Swedish passages", ) diff --git a/mteb/tasks/Clustering/swe/swedn_clustering.py b/mteb/tasks/Clustering/swe/swedn_clustering.py index ab13883172..597496f35e 100644 --- a/mteb/tasks/Clustering/swe/swedn_clustering.py +++ b/mteb/tasks/Clustering/swe/swedn_clustering.py @@ -48,12 +48,14 @@ class SwednClustering(AbsTaskClustering): dialect=[], task_subtypes=["Thematic clustering"], sample_creation="found", - bibtex_citation="""@inproceedings{monsen2021method, - title={A method for building non-english corpora for abstractive text summarization}, - author={Monsen, Julius and J{\"o}nsson, Arne}, - booktitle={Proceedings of CLARIN Annual Conference}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{monsen2021method, + author = {Monsen, Julius and J{\"o}nsson, Arne}, + booktitle = {Proceedings of CLARIN Annual Conference}, + title = {A method for building non-english corpora for abstractive text summarization}, + year = {2021}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Clustering/zho/CMTEBClustering.py b/mteb/tasks/Clustering/zho/CMTEBClustering.py index 856a969ba0..24ea372983 100644 --- a/mteb/tasks/Clustering/zho/CMTEBClustering.py +++ b/mteb/tasks/Clustering/zho/CMTEBClustering.py @@ -39,14 +39,16 @@ class CLSClusteringFastS2S(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{li2022csl, - title={CSL: A Large-scale Chinese Scientific Literature Dataset}, - author={Yudong Li and Yuqing Zhang and Zhe Zhao and Linlin Shen and Weijie Liu and Weiquan Mao and Hui Zhang}, - year={2022}, - eprint={2209.05034}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }""", + bibtex_citation=r""" +@misc{li2022csl, + archiveprefix = {arXiv}, + author = {Yudong Li and Yuqing Zhang and Zhe Zhao and Linlin Shen and Weijie Liu and Weiquan Mao and Hui Zhang}, + eprint = {2209.05034}, + primaryclass = {cs.CL}, + title = {CSL: A Large-scale Chinese Scientific Literature Dataset}, + year = {2022}, +} +""", prompt="Identify the main category of scholar papers based on the titles", adapted_from=["CLSClusteringS2S"], ) @@ -96,14 +98,16 @@ class CLSClusteringFastP2P(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{li2022csl, - title={CSL: A Large-scale Chinese Scientific Literature Dataset}, - author={Yudong Li and Yuqing Zhang and Zhe Zhao and Linlin Shen and Weijie Liu and Weiquan Mao and Hui Zhang}, - year={2022}, - eprint={2209.05034}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }""", + bibtex_citation=r""" +@misc{li2022csl, + archiveprefix = {arXiv}, + author = {Yudong Li and Yuqing Zhang and Zhe Zhao and Linlin Shen and Weijie Liu and Weiquan Mao and Hui Zhang}, + eprint = {2209.05034}, + primaryclass = {cs.CL}, + title = {CSL: A Large-scale Chinese Scientific Literature Dataset}, + year = {2022}, +} +""", prompt="Identify the main category of scholar papers based on the titles and abstracts", adapted_from=["CLSClusteringP2P"], ) @@ -152,12 +156,12 @@ class CLSClusteringS2S(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=""" + bibtex_citation=r""" @article{li2022csl, - title={CSL: A large-scale Chinese scientific literature dataset}, - author={Li, Yudong and Zhang, Yuqing and Zhao, Zhe and Shen, Linlin and Liu, Weijie and Mao, Weiquan and Zhang, Hui}, - journal={arXiv preprint arXiv:2209.05034}, - year={2022} + author = {Li, Yudong and Zhang, Yuqing and Zhao, Zhe and Shen, Linlin and Liu, Weijie and Mao, Weiquan and Zhang, Hui}, + journal = {arXiv preprint arXiv:2209.05034}, + title = {CSL: A large-scale Chinese scientific literature dataset}, + year = {2022}, } """, prompt="Identify the main category of scholar papers based on the titles", @@ -188,12 +192,14 @@ class CLSClusteringP2P(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{li2022csl, - title={CSL: A large-scale Chinese scientific literature dataset}, - author={Li, Yudong and Zhang, Yuqing and Zhao, Zhe and Shen, Linlin and Liu, Weijie and Mao, Weiquan and Zhang, Hui}, - journal={arXiv preprint arXiv:2209.05034}, - year={2022} -}""", + bibtex_citation=r""" +@article{li2022csl, + author = {Li, Yudong and Zhang, Yuqing and Zhao, Zhe and Shen, Linlin and Liu, Weijie and Mao, Weiquan and Zhang, Hui}, + journal = {arXiv preprint arXiv:2209.05034}, + title = {CSL: A large-scale Chinese scientific literature dataset}, + year = {2022}, +} +""", prompt="Identify the main category of scholar papers based on the titles and abstracts", ) @@ -223,14 +229,16 @@ class ThuNewsClusteringFastS2S(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@software{THUCTC, + bibtex_citation=r""" +@software{THUCTC, author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.}, - title = {THUCTC: An Efficient Chinese Text Classifier}, - year = {2016}, note = {THU Chinese Text Classification Toolkit}, publisher = {THU Natural Language Processing Lab}, - url = {https://github.com/thunlp/THUCTC} -}""", + title = {THUCTC: An Efficient Chinese Text Classifier}, + url = {https://github.com/thunlp/THUCTC}, + year = {2016}, +} +""", prompt="Identify the topic or theme of the given news articles based on the titles", adapted_from=["ThuNewsClusteringS2S"], ) @@ -280,14 +288,16 @@ class ThuNewsClusteringFastP2P(AbsTaskClusteringFast): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@software{THUCTC, + bibtex_citation=r""" +@software{THUCTC, author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.}, - title = {THUCTC: An Efficient Chinese Text Classifier}, - year = {2016}, note = {THU Chinese Text Classification Toolkit}, publisher = {THU Natural Language Processing Lab}, - url = {https://github.com/thunlp/THUCTC} -}""", + title = {THUCTC: An Efficient Chinese Text Classifier}, + url = {https://github.com/thunlp/THUCTC}, + year = {2016}, +} +""", prompt="Identify the topic or theme of the given news articles based on the titles and contents", adapted_from=["ThuNewsClusteringP2P"], ) @@ -336,19 +346,20 @@ class ThuNewsClusteringS2S(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{eisner2007proceedings, - title={Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL)}, - author={Eisner, Jason}, - booktitle={Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}, - year={2007} + author = {Eisner, Jason}, + booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}, + title = {Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL)}, + year = {2007}, } + @inproceedings{li2006comparison, - title={A comparison and semi-quantitative analysis of words and character-bigrams as features in chinese text categorization}, - author={Li, Jingyang and Sun, Maosong and Zhang, Xian}, - booktitle={proceedings of the 21st international conference on computational linguistics and 44th annual meeting of the association for computational linguistics}, - pages={545--552}, - year={2006} + author = {Li, Jingyang and Sun, Maosong and Zhang, Xian}, + booktitle = {proceedings of the 21st international conference on computational linguistics and 44th annual meeting of the association for computational linguistics}, + pages = {545--552}, + title = {A comparison and semi-quantitative analysis of words and character-bigrams as features in chinese text categorization}, + year = {2006}, } """, prompt="Identify the topic or theme of the given news articles based on the titles", @@ -379,19 +390,20 @@ class ThuNewsClusteringP2P(AbsTaskClustering): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{eisner2007proceedings, - title={Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL)}, - author={Eisner, Jason}, - booktitle={Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}, - year={2007} + author = {Eisner, Jason}, + booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}, + title = {Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL)}, + year = {2007}, } + @inproceedings{li2006comparison, - title={A comparison and semi-quantitative analysis of words and character-bigrams as features in chinese text categorization}, - author={Li, Jingyang and Sun, Maosong and Zhang, Xian}, - booktitle={proceedings of the 21st international conference on computational linguistics and 44th annual meeting of the association for computational linguistics}, - pages={545--552}, - year={2006} + author = {Li, Jingyang and Sun, Maosong and Zhang, Xian}, + booktitle = {proceedings of the 21st international conference on computational linguistics and 44th annual meeting of the association for computational linguistics}, + pages = {545--552}, + title = {A comparison and semi-quantitative analysis of words and character-bigrams as features in chinese text categorization}, + year = {2006}, } """, prompt="Identify the topic or theme of the given news articles based on the titles and contents", diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index 7a724d22e7..3ebe9db7a6 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -27,12 +27,14 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{fu2024blink, - title={Blink: Multimodal large language models can see but not perceive}, - author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, - journal={arXiv preprint arXiv:2404.12390}, - year={2024} - }""", + bibtex_citation=r""" +@article{fu2024blink, + author = {Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal = {arXiv preprint arXiv:2404.12390}, + title = {Blink: Multimodal large language models can see but not perceive}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 1206}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index 0f191398ff..225cb0d971 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -26,12 +26,14 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{fu2024blink, - title={Blink: Multimodal large language models can see but not perceive}, - author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, - journal={arXiv preprint arXiv:2404.12390}, - year={2024} - }""", + bibtex_citation=r""" +@article{fu2024blink, + author = {Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal = {arXiv preprint arXiv:2404.12390}, + title = {Blink: Multimodal large language models can see but not perceive}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 813}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/CVBench.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/CVBench.py index 2409424e96..5c36549cdc 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/CVBench.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/CVBench.py @@ -109,12 +109,14 @@ class CVBenchCount(AbsTaskAny2AnyMultiChoice): dialect=[], modalities=["image", "text"], sample_creation="found", - bibtex_citation="""@article{tong2024cambrian, - title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, - author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, - journal={arXiv preprint arXiv:2406.16860}, - year={2024} -}""", + bibtex_citation=r""" +@article{tong2024cambrian, + author = {Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal = {arXiv preprint arXiv:2406.16860}, + title = {Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 419}, "avg_character_length": { @@ -162,12 +164,14 @@ class CVBenchRelation(AbsTaskAny2AnyMultiChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{tong2024cambrian, - title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, - author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, - journal={arXiv preprint arXiv:2406.16860}, - year={2024} -}""", + bibtex_citation=r""" +@article{tong2024cambrian, + author = {Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal = {arXiv preprint arXiv:2406.16860}, + title = {Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 654}, "avg_character_length": { @@ -215,12 +219,14 @@ class CVBenchDepth(AbsTaskAny2AnyMultiChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{tong2024cambrian, - title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, - author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, - journal={arXiv preprint arXiv:2406.16860}, - year={2024} -}""", + bibtex_citation=r""" +@article{tong2024cambrian, + author = {Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal = {arXiv preprint arXiv:2406.16860}, + title = {Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 669}, "avg_character_length": { @@ -268,12 +274,14 @@ class CVBenchDistance(AbsTaskAny2AnyMultiChoice): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{tong2024cambrian, - title={Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, - author={Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, - journal={arXiv preprint arXiv:2406.16860}, - year={2024} -}""", + bibtex_citation=r""" +@article{tong2024cambrian, + author = {Tong, Shengbang and Brown, Ellis and Wu, Penghao and Woo, Sanghyun and Middepogu, Manoj and Akula, Sai Charitha and Yang, Jihan and Yang, Shusheng and Iyer, Adithya and Pan, Xichen and others}, + journal = {arXiv preprint arXiv:2406.16860}, + title = {Cambrian-1: A fully open, vision-centric exploration of multimodal llms}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 656}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py index 55535d6344..eec1e9f3d4 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2IRetrieval.py @@ -27,12 +27,14 @@ class BLINKIT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{fu2024blink, - title={Blink: Multimodal large language models can see but not perceive}, - author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, - journal={arXiv preprint arXiv:2404.12390}, - year={2024} -}""", + bibtex_citation=r""" +@article{fu2024blink, + author = {Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal = {arXiv preprint arXiv:2404.12390}, + title = {Blink: Multimodal large language models can see but not perceive}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 402}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py index 9d480e2928..377e7c80c8 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/BLINKIT2TRetrieval.py @@ -27,12 +27,14 @@ class BLINKIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{fu2024blink, - title={Blink: Multimodal large language models can see but not perceive}, - author={Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, - journal={arXiv preprint arXiv:2404.12390}, - year={2024} -}""", + bibtex_citation=r""" +@article{fu2024blink, + author = {Fu, Xingyu and Hu, Yushi and Li, Bangzheng and Feng, Yu and Wang, Haoyu and Lin, Xudong and Roth, Dan and Smith, Noah A and Ma, Wei-Chiu and Krishna, Ranjay}, + journal = {arXiv preprint arXiv:2404.12390}, + title = {Blink: Multimodal large language models can see but not perceive}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 1073}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py index ed0172ae79..91f6c970e8 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/CIRRIT2IRetrieval.py @@ -27,13 +27,15 @@ class CIRRIT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{liu2021image, - title={Image retrieval on real-life images with pre-trained vision-and-language models}, - author={Liu, Zheyuan and Rodriguez-Opazo, Cristian and Teney, Damien and Gould, Stephen}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={2125--2134}, - year={2021} - }""", + bibtex_citation=r""" +@inproceedings{liu2021image, + author = {Liu, Zheyuan and Rodriguez-Opazo, Cristian and Teney, Damien and Gould, Stephen}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages = {2125--2134}, + title = {Image retrieval on real-life images with pre-trained vision-and-language models}, + year = {2021}, +} +""", prompt={ "query": "Retrieve a day-to-day image that aligns with the modification instructions of the provided image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py index 72ed0b7c75..1b8a39dc79 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/CUB200I2IRetrieval.py @@ -26,13 +26,15 @@ class CUB200I2I(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@article{article, - author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro}, - year = {2010}, - month = {09}, - pages = {}, - title = {Caltech-UCSD Birds 200} - }""", + bibtex_citation=r""" +@article{article, + author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro}, + month = {09}, + pages = {}, + title = {Caltech-UCSD Birds 200}, + year = {2010}, +} +""", descriptive_stats={ "n_samples": {"default": 5794}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/EDIST2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/EDIST2ITRetrieval.py index ac7b310998..65941e2cf9 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/EDIST2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/EDIST2ITRetrieval.py @@ -26,13 +26,15 @@ class EDIST2ITRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{liu2023edis, - title={EDIS: Entity-Driven Image Search over Multimodal Web Content}, - author={Liu, Siqi and Feng, Weixi and Fu, Tsu-Jui and Chen, Wenhu and Wang, William}, - booktitle={Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, - pages={4877--4894}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{liu2023edis, + author = {Liu, Siqi and Feng, Weixi and Fu, Tsu-Jui and Chen, Wenhu and Wang, William}, + booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, + pages = {4877--4894}, + title = {EDIS: Entity-Driven Image Search over Multimodal Web Content}, + year = {2023}, +} +""", prompt={"query": "Identify the news photo for the given caption."}, descriptive_stats={ "n_samples": {"test": 3241}, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/EncyclopediaVQAIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/EncyclopediaVQAIT2ITRetrieval.py index 01f2e6a980..7105c4f391 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/EncyclopediaVQAIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/EncyclopediaVQAIT2ITRetrieval.py @@ -26,13 +26,15 @@ class EncyclopediaVQAIT2ITRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{mensink2023encyclopedic, - title={Encyclopedic VQA: Visual questions about detailed properties of fine-grained categories}, - author={Mensink, Thomas and Uijlings, Jasper and Castrejon, Lluis and Goel, Arushi and Cadar, Felipe and Zhou, Howard and Sha, Fei and Araujo, Andr{\'e} and Ferrari, Vittorio}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={3113--3124}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{mensink2023encyclopedic, + author = {Mensink, Thomas and Uijlings, Jasper and Castrejon, Lluis and Goel, Arushi and Cadar, Felipe and Zhou, Howard and Sha, Fei and Araujo, Andr{\'e} and Ferrari, Vittorio}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages = {3113--3124}, + title = {Encyclopedic VQA: Visual questions about detailed properties of fine-grained categories}, + year = {2023}, +} +""", prompt={ "query": "Obtain illustrated documents that correspond to the inquiry alongside the provided image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py index 5fae69bfd0..ff3180e780 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/FORBI2IRetrieval.py @@ -26,15 +26,17 @@ class FORBI2I(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@misc{wu2023forbflatobjectretrieval, - title={FORB: A Flat Object Retrieval Benchmark for Universal Image Embedding}, - author={Pengxiang Wu and Siman Wang and Kevin Dela Rosa and Derek Hao Hu}, - year={2023}, - eprint={2309.16249}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/2309.16249}, - }""", + bibtex_citation=r""" +@misc{wu2023forbflatobjectretrieval, + archiveprefix = {arXiv}, + author = {Pengxiang Wu and Siman Wang and Kevin Dela Rosa and Derek Hao Hu}, + eprint = {2309.16249}, + primaryclass = {cs.CV}, + title = {FORB: A Flat Object Retrieval Benchmark for Universal Image Embedding}, + url = {https://arxiv.org/abs/2309.16249}, + year = {2023}, +} +""", descriptive_stats={ "n_samples": {"default": 13250}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py index 5ba43daf1d..c0e89b0810 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kI2TRetrieval.py @@ -26,13 +26,15 @@ class Fashion200kI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{han2017automatic, - title={Automatic spatially-aware fashion concept discovery}, - author={Han, Xintong and Wu, Zuxuan and Huang, Phoenix X and Zhang, Xiao and Zhu, Menglong and Li, Yuan and Zhao, Yang and Davis, Larry S}, - booktitle={Proceedings of the IEEE international conference on computer vision}, - pages={1463--1471}, - year={2017} -}""", + bibtex_citation=r""" +@inproceedings{han2017automatic, + author = {Han, Xintong and Wu, Zuxuan and Huang, Phoenix X and Zhang, Xiao and Zhu, Menglong and Li, Yuan and Zhao, Yang and Davis, Larry S}, + booktitle = {Proceedings of the IEEE international conference on computer vision}, + pages = {1463--1471}, + title = {Automatic spatially-aware fashion concept discovery}, + year = {2017}, +} +""", prompt={ "query": "Based on the following fashion description, retrieve the best matching image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py index 1511de7aa4..385ec4a6b8 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Fashion200kT2IRetrieval.py @@ -27,13 +27,15 @@ class Fashion200kT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{han2017automatic, - title={Automatic spatially-aware fashion concept discovery}, - author={Han, Xintong and Wu, Zuxuan and Huang, Phoenix X and Zhang, Xiao and Zhu, Menglong and Li, Yuan and Zhao, Yang and Davis, Larry S}, - booktitle={Proceedings of the IEEE international conference on computer vision}, - pages={1463--1471}, - year={2017} -}""", + bibtex_citation=r""" +@inproceedings{han2017automatic, + author = {Han, Xintong and Wu, Zuxuan and Huang, Phoenix X and Zhang, Xiao and Zhu, Menglong and Li, Yuan and Zhao, Yang and Davis, Larry S}, + booktitle = {Proceedings of the IEEE international conference on computer vision}, + pages = {1463--1471}, + title = {Automatic spatially-aware fashion concept discovery}, + year = {2017}, +} +""", prompt={ "query": "Based on the following fashion description, retrieve the best matching image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py index 4e1209c23c..a2910c3f16 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/FashionIQIT2IRetrieval.py @@ -27,13 +27,15 @@ class FashionIQIT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{wu2021fashion, - title={Fashion iq: A new dataset towards retrieving images by natural language feedback}, - author={Wu, Hui and Gao, Yupeng and Guo, Xiaoxiao and Al-Halah, Ziad and Rennie, Steven and Grauman, Kristen and Feris, Rogerio}, - booktitle={Proceedings of the IEEE/CVF Conference on computer vision and pattern recognition}, - pages={11307--11317}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{wu2021fashion, + author = {Wu, Hui and Gao, Yupeng and Guo, Xiaoxiao and Al-Halah, Ziad and Rennie, Steven and Grauman, Kristen and Feris, Rogerio}, + booktitle = {Proceedings of the IEEE/CVF Conference on computer vision and pattern recognition}, + pages = {11307--11317}, + title = {Fashion iq: A new dataset towards retrieving images by natural language feedback}, + year = {2021}, +} +""", prompt={ "query": "Find a fashion image that aligns with the reference image and style note." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py index 43aeea20d4..1354473ba6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kI2TRetrieval.py @@ -26,15 +26,17 @@ class Flickr30kI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{Young2014FromID, - title={From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions}, - author={Peter Young and Alice Lai and Micah Hodosh and J. Hockenmaier}, - journal={Transactions of the Association for Computational Linguistics}, - year={2014}, - volume={2}, - pages={67-78}, - url={https://api.semanticscholar.org/CorpusID:3104920} -}""", + bibtex_citation=r""" +@article{Young2014FromID, + author = {Peter Young and Alice Lai and Micah Hodosh and J. Hockenmaier}, + journal = {Transactions of the Association for Computational Linguistics}, + pages = {67-78}, + title = {From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions}, + url = {https://api.semanticscholar.org/CorpusID:3104920}, + volume = {2}, + year = {2014}, +} +""", prompt={"query": "Find an image caption describing the following image."}, descriptive_stats={ "n_samples": {"test": 1000}, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py index cb87cfcf86..fdc0597bfb 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/Flickr30kT2IRetrieval.py @@ -26,15 +26,17 @@ class Flickr30kT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{Young2014FromID, - title={From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions}, - author={Peter Young and Alice Lai and Micah Hodosh and J. Hockenmaier}, - journal={Transactions of the Association for Computational Linguistics}, - year={2014}, - volume={2}, - pages={67-78}, - url={https://api.semanticscholar.org/CorpusID:3104920} -}""", + bibtex_citation=r""" +@article{Young2014FromID, + author = {Peter Young and Alice Lai and Micah Hodosh and J. Hockenmaier}, + journal = {Transactions of the Association for Computational Linguistics}, + pages = {67-78}, + title = {From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions}, + url = {https://api.semanticscholar.org/CorpusID:3104920}, + volume = {2}, + year = {2014}, +} +""", prompt={"query": "Find an image that matches the given caption."}, descriptive_stats={ "n_samples": {"test": 5000}, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py index 48736f9e5c..95cd709a36 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2IRetrieval.py @@ -26,13 +26,15 @@ class GLDv2I2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@InProceedings{Weyand_2020_CVPR, -author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, -title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, -booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, -month = {June}, -year = {2020} -}""", + bibtex_citation=r""" +@inproceedings{Weyand_2020_CVPR, + author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": {"test": 1129}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py index 4b6f30082d..0a24ff2f14 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/GLDv2I2TRetrieval.py @@ -26,13 +26,15 @@ class GLDv2I2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@InProceedings{Weyand_2020_CVPR, -author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, -title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, -booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, -month = {June}, -year = {2020} -}""", + bibtex_citation=r""" +@inproceedings{Weyand_2020_CVPR, + author = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + title = {Google Landmarks Dataset v2 - A Large-Scale Benchmark for Instance-Level Recognition and Retrieval}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": {"test": 1972}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py index 9d0cc4ee86..4d9d2bf1a1 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesI2TRetrieval.py @@ -85,14 +85,16 @@ class HatefulMemesI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{kiela2020hateful, - title={The hateful memes challenge: Detecting hate speech in multimodal memes}, - author={Kiela, Douwe and Firooz, Hamed and Mohan, Aravind and Goswami, Vedanuj and Singh, Amanpreet and Ringshia, Pratik and Testuggine, Davide}, - journal={Advances in neural information processing systems}, - volume={33}, - pages={2611--2624}, - year={2020} -}""", + bibtex_citation=r""" +@article{kiela2020hateful, + author = {Kiela, Douwe and Firooz, Hamed and Mohan, Aravind and Goswami, Vedanuj and Singh, Amanpreet and Ringshia, Pratik and Testuggine, Davide}, + journal = {Advances in neural information processing systems}, + pages = {2611--2624}, + title = {The hateful memes challenge: Detecting hate speech in multimodal memes}, + volume = {33}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py index 0da8d6775b..4d403bb310 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/HatefulMemesT2IRetrieval.py @@ -85,14 +85,16 @@ class HatefulMemesT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{kiela2020hateful, - title={The hateful memes challenge: Detecting hate speech in multimodal memes}, - author={Kiela, Douwe and Firooz, Hamed and Mohan, Aravind and Goswami, Vedanuj and Singh, Amanpreet and Ringshia, Pratik and Testuggine, Davide}, - journal={Advances in neural information processing systems}, - volume={33}, - pages={2611--2624}, - year={2020} -}""", + bibtex_citation=r""" +@article{kiela2020hateful, + author = {Kiela, Douwe and Firooz, Hamed and Mohan, Aravind and Goswami, Vedanuj and Singh, Amanpreet and Ringshia, Pratik and Testuggine, Davide}, + journal = {Advances in neural information processing systems}, + pages = {2611--2624}, + title = {The hateful memes challenge: Detecting hate speech in multimodal memes}, + volume = {33}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py index 0e1b99bffc..8b420db51a 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ImageCoDeT2IRetrieval.py @@ -26,12 +26,14 @@ class ImageCoDeT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{krojer2022image, - title={Image retrieval from contextual descriptions}, - author={Krojer, Benno and Adlakha, Vaibhav and Vineet, Vibhav and Goyal, Yash and Ponti, Edoardo and Reddy, Siva}, - journal={arXiv preprint arXiv:2203.15867}, - year={2022} -}""", + bibtex_citation=r""" +@article{krojer2022image, + author = {Krojer, Benno and Adlakha, Vaibhav and Vineet, Vibhav and Goyal, Yash and Ponti, Edoardo and Reddy, Siva}, + journal = {arXiv preprint arXiv:2203.15867}, + title = {Image retrieval from contextual descriptions}, + year = {2022}, +} +""", descriptive_stats={ "n_samples": {"test": 2302}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py index f695de1d19..ebbf3936a3 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2ITRetrieval.py @@ -27,13 +27,15 @@ class InfoSeekIT2ITRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{chen2023can, - title={Can Pre-trained Vision and Language Models Answer Visual Information-Seeking Questions?}, - author={Chen, Yang and Hu, Hexiang and Luan, Yi and Sun, Haitian and Changpinyo, Soravit and Ritter, Alan and Chang, Ming-Wei}, - booktitle={Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, - pages={14948--14968}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{chen2023can, + author = {Chen, Yang and Hu, Hexiang and Luan, Yi and Sun, Haitian and Changpinyo, Soravit and Ritter, Alan and Chang, Ming-Wei}, + booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, + pages = {14948--14968}, + title = {Can Pre-trained Vision and Language Models Answer Visual Information-Seeking Questions?}, + year = {2023}, +} +""", prompt={ "query": "Find an image and subject description from Wikipedia that answers my question about this image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py index e5cecd8591..bd11c5584c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/InfoSeekIT2TRetrieval.py @@ -27,13 +27,15 @@ class InfoSeekIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{chen2023can, - title={Can Pre-trained Vision and Language Models Answer Visual Information-Seeking Questions?}, - author={Chen, Yang and Hu, Hexiang and Luan, Yi and Sun, Haitian and Changpinyo, Soravit and Ritter, Alan and Chang, Ming-Wei}, - booktitle={Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, - pages={14948--14968}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{chen2023can, + author = {Chen, Yang and Hu, Hexiang and Luan, Yi and Sun, Haitian and Changpinyo, Soravit and Ritter, Alan and Chang, Ming-Wei}, + booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, + pages = {14948--14968}, + title = {Can Pre-trained Vision and Language Models Answer Visual Information-Seeking Questions?}, + year = {2023}, +} +""", prompt={ "query": "Find a paragraph from Wikipedia that answers my question about this image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/LLaVAIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/LLaVAIT2TRetrieval.py index 9a0ded2203..9147520531 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/LLaVAIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/LLaVAIT2TRetrieval.py @@ -26,24 +26,26 @@ class LLaVAIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{lin-etal-2024-preflmr, - title = "{P}re{FLMR}: Scaling Up Fine-Grained Late-Interaction Multi-modal Retrievers", - author = "Lin, Weizhe and - Mei, Jingbiao and - Chen, Jinghong and - Byrne, Bill", - editor = "Ku, Lun-Wei and - Martins, Andre and - Srikumar, Vivek", - booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = aug, - year = "2024", - address = "Bangkok, Thailand", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2024.acl-long.289", - doi = "10.18653/v1/2024.acl-long.289", - pages = "5294--5316", -}""", + bibtex_citation=r""" +@inproceedings{lin-etal-2024-preflmr, + address = {Bangkok, Thailand}, + author = {Lin, Weizhe and +Mei, Jingbiao and +Chen, Jinghong and +Byrne, Bill}, + booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.18653/v1/2024.acl-long.289}, + editor = {Ku, Lun-Wei and +Martins, Andre and +Srikumar, Vivek}, + month = aug, + pages = {5294--5316}, + publisher = {Association for Computational Linguistics}, + title = {{P}re{FLMR}: Scaling Up Fine-Grained Late-Interaction Multi-modal Retrievers}, + url = {https://aclanthology.org/2024.acl-long.289}, + year = {2024}, +} +""", prompt={ "query": "Provide a specific decription of the image along with the following question." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py index 8cd8a1b217..30a77c0e7c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/METI2IRetrieval.py @@ -26,12 +26,14 @@ class METI2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{ypsilantis2021met, - title={The met dataset: Instance-level recognition for artworks}, - author={Ypsilantis, Nikolaos-Antonios and Garcia, Noa and Han, Guangxing and Ibrahimi, Sarah and Van Noord, Nanne and Tolias, Giorgos}, - booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{ypsilantis2021met, + author = {Ypsilantis, Nikolaos-Antonios and Garcia, Noa and Han, Guangxing and Ibrahimi, Sarah and Van Noord, Nanne and Tolias, Giorgos}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + title = {The met dataset: Instance-level recognition for artworks}, + year = {2021}, +} +""", descriptive_stats={ # "n_samples": {"default": 397121}, }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py index bc4ce63c72..ca15edc3f1 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOI2TRetrieval.py @@ -27,14 +27,16 @@ class MSCOCOI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{lin2014microsoft, - title={Microsoft coco: Common objects in context}, - author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, - booktitle={Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, - pages={740--755}, - year={2014}, - organization={Springer} - }""", + bibtex_citation=r""" +@inproceedings{lin2014microsoft, + author = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle = {Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, + organization = {Springer}, + pages = {740--755}, + title = {Microsoft coco: Common objects in context}, + year = {2014}, +} +""", prompt={ "query": "Find an image caption describing the following everyday image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py index 4885e236c2..534f19d573 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MSCOCOT2IRetrieval.py @@ -27,14 +27,16 @@ class MSCOCOT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{lin2014microsoft, - title={Microsoft coco: Common objects in context}, - author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, - booktitle={Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, - pages={740--755}, - year={2014}, - organization={Springer} - }""", + bibtex_citation=r""" +@inproceedings{lin2014microsoft, + author = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle = {Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13}, + organization = {Springer}, + pages = {740--755}, + title = {Microsoft coco: Common objects in context}, + year = {2014}, +} +""", prompt={"query": "Identify the image showcasing the described everyday scene."}, descriptive_stats={ "n_samples": {"test": 24809}, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py index 6102388b41..7f93fb1fe8 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionI2TRetrieval.py @@ -109,13 +109,15 @@ class MemotionI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{sharma2020semeval, - title={SemEval-2020 Task 8: Memotion Analysis-the Visuo-Lingual Metaphor!}, - author={Sharma, Chhavi and Bhageria, Deepesh and Scott, William and Pykl, Srinivas and Das, Amitava and Chakraborty, Tanmoy and Pulabaigari, Viswanath and Gamb{\"a}ck, Bj{\"o}rn}, - booktitle={Proceedings of the Fourteenth Workshop on Semantic Evaluation}, - pages={759--773}, - year={2020} -}""", + bibtex_citation=r""" +@inproceedings{sharma2020semeval, + author = {Sharma, Chhavi and Bhageria, Deepesh and Scott, William and Pykl, Srinivas and Das, Amitava and Chakraborty, Tanmoy and Pulabaigari, Viswanath and Gamb{\"a}ck, Bj{\"o}rn}, + booktitle = {Proceedings of the Fourteenth Workshop on Semantic Evaluation}, + pages = {759--773}, + title = {SemEval-2020 Task 8: Memotion Analysis-the Visuo-Lingual Metaphor!}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py index 1afc30f90c..ebe784a78e 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/MemotionT2IRetrieval.py @@ -107,13 +107,15 @@ class MemotionT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{sharma2020semeval, - title={SemEval-2020 Task 8: Memotion Analysis-the Visuo-Lingual Metaphor!}, - author={Sharma, Chhavi and Bhageria, Deepesh and Scott, William and Pykl, Srinivas and Das, Amitava and Chakraborty, Tanmoy and Pulabaigari, Viswanath and Gamb{\"a}ck, Bj{\"o}rn}, - booktitle={Proceedings of the Fourteenth Workshop on Semantic Evaluation}, - pages={759--773}, - year={2020} -}""", + bibtex_citation=r""" +@inproceedings{sharma2020semeval, + author = {Sharma, Chhavi and Bhageria, Deepesh and Scott, William and Pykl, Srinivas and Das, Amitava and Chakraborty, Tanmoy and Pulabaigari, Viswanath and Gamb{\"a}ck, Bj{\"o}rn}, + booktitle = {Proceedings of the Fourteenth Workshop on Semantic Evaluation}, + pages = {759--773}, + title = {SemEval-2020 Task 8: Memotion Analysis-the Visuo-Lingual Metaphor!}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py index aa05ac6494..90fcfc2a8d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/NIGHTSI2IRetrieval.py @@ -26,13 +26,15 @@ class NIGHTSI2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@article{fu2024dreamsim, - title={DreamSim: Learning New Dimensions of Human Visual Similarity using Synthetic Data}, - author={Fu, Stephanie and Tamir, Netanel and Sundaram, Shobhita and Chai, Lucy and Zhang, Richard and Dekel, Tali and Isola, Phillip}, - journal={Advances in Neural Information Processing Systems}, - volume={36}, - year={2024} -}""", + bibtex_citation=r""" +@article{fu2024dreamsim, + author = {Fu, Stephanie and Tamir, Netanel and Sundaram, Shobhita and Chai, Lucy and Zhang, Richard and Dekel, Tali and Isola, Phillip}, + journal = {Advances in Neural Information Processing Systems}, + title = {DreamSim: Learning New Dimensions of Human Visual Similarity using Synthetic Data}, + volume = {36}, + year = {2024}, +} +""", prompt={ "query": "Find a day-to-day image that looks similar to the provided image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OKVQAIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OKVQAIT2TRetrieval.py index 65b1c3b202..69c0bd8372 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OKVQAIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OKVQAIT2TRetrieval.py @@ -26,13 +26,15 @@ class OKVQAIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{marino2019ok, - title={Ok-vqa: A visual question answering benchmark requiring external knowledge}, - author={Marino, Kenneth and Rastegari, Mohammad and Farhadi, Ali and Mottaghi, Roozbeh}, - booktitle={Proceedings of the IEEE/cvf conference on computer vision and pattern recognition}, - pages={3195--3204}, - year={2019} -}""", + bibtex_citation=r""" +@inproceedings{marino2019ok, + author = {Marino, Kenneth and Rastegari, Mohammad and Farhadi, Ali and Mottaghi, Roozbeh}, + booktitle = {Proceedings of the IEEE/cvf conference on computer vision and pattern recognition}, + pages = {3195--3204}, + title = {Ok-vqa: A visual question answering benchmark requiring external knowledge}, + year = {2019}, +} +""", prompt={ "query": "Retrieve documents that provide an answer to the question alongside the image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py index c6d1ef6baa..60603a9ef6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2ITRetrieval.py @@ -26,13 +26,15 @@ class OVENIT2ITRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{hu2023open, - title={Open-domain visual entity recognition: Towards recognizing millions of wikipedia entities}, - author={Hu, Hexiang and Luan, Yi and Chen, Yang and Khandelwal, Urvashi and Joshi, Mandar and Lee, Kenton and Toutanova, Kristina and Chang, Ming-Wei}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={12065--12075}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{hu2023open, + author = {Hu, Hexiang and Luan, Yi and Chen, Yang and Khandelwal, Urvashi and Joshi, Mandar and Lee, Kenton and Toutanova, Kristina and Chang, Ming-Wei}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages = {12065--12075}, + title = {Open-domain visual entity recognition: Towards recognizing millions of wikipedia entities}, + year = {2023}, +} +""", prompt={ "query": "Retrieve a Wikipedia image-description pair that provides evidence for the question of this image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py index 94898f4819..1c9ca29f24 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/OVENIT2TRetrieval.py @@ -26,13 +26,15 @@ class OVENIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text"], sample_creation="created", - bibtex_citation="""@inproceedings{hu2023open, - title={Open-domain visual entity recognition: Towards recognizing millions of wikipedia entities}, - author={Hu, Hexiang and Luan, Yi and Chen, Yang and Khandelwal, Urvashi and Joshi, Mandar and Lee, Kenton and Toutanova, Kristina and Chang, Ming-Wei}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={12065--12075}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{hu2023open, + author = {Hu, Hexiang and Luan, Yi and Chen, Yang and Khandelwal, Urvashi and Joshi, Mandar and Lee, Kenton and Toutanova, Kristina and Chang, Ming-Wei}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages = {12065--12075}, + title = {Open-domain visual entity recognition: Towards recognizing millions of wikipedia entities}, + year = {2023}, +} +""", prompt={ "query": "Retrieve a Wikipedia paragraph that provides an answer to the given query about the image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py index 2e06267764..a20fa52c67 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ROxfordI2IRetrieval.py @@ -31,13 +31,15 @@ class ROxfordEasyI2IRetrieval(MultiChoiceEvaluationMixin, AbsTaskAny2AnyRetrieva dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{radenovic2018revisiting, - title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, - author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={5706--5715}, - year={2018} -}""", + bibtex_citation=r""" +@inproceedings{radenovic2018revisiting, + author = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {5706--5715}, + title = {Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + year = {2018}, +} +""", descriptive_stats={ "n_samples": {"test": 5063}, "avg_character_length": { @@ -76,14 +78,15 @@ class ROxfordMediumI2IRetrieval(MultiChoiceEvaluationMixin, AbsTaskAny2AnyRetrie dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{radenovic2018revisiting, - title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, - author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={5706--5715}, - year={2018} + bibtex_citation=r""" +@inproceedings{radenovic2018revisiting, + author = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {5706--5715}, + title = {Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + year = {2018}, } - """, +""", descriptive_stats={ "n_samples": {"test": 5063}, "avg_character_length": { @@ -122,14 +125,15 @@ class ROxfordHardI2IRetrieval(MultiChoiceEvaluationMixin, AbsTaskAny2AnyRetrieva dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{radenovic2018revisiting, - title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, - author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={5706--5715}, - year={2018} + bibtex_citation=r""" +@inproceedings{radenovic2018revisiting, + author = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {5706--5715}, + title = {Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + year = {2018}, } - """, +""", descriptive_stats={ "n_samples": {"test": 5063}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py index 22ca9c60d9..598bc295f6 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RP2kI2IRetrieval.py @@ -26,12 +26,14 @@ class RP2kI2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@article{peng2020rp2k, - title={RP2K: A large-scale retail product dataset for fine-grained image classification}, - author={Peng, Jingtian and Xiao, Chang and Li, Yifan}, - journal={arXiv preprint arXiv:2006.12634}, - year={2020} -}""", + bibtex_citation=r""" +@article{peng2020rp2k, + author = {Peng, Jingtian and Xiao, Chang and Li, Yifan}, + journal = {arXiv preprint arXiv:2006.12634}, + title = {RP2K: A large-scale retail product dataset for fine-grained image classification}, + year = {2020}, +} +""", descriptive_stats={ "n_samples": {"test": 39457}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py index 494a7af939..d435961959 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/RParisI2IRetrieval.py @@ -27,13 +27,15 @@ class RParisEasyI2IRetrieval(MultiChoiceEvaluationMixin, AbsTaskAny2AnyRetrieval dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{radenovic2018revisiting, - title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, - author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={5706--5715}, - year={2018} -}""", + bibtex_citation=r""" +@inproceedings{radenovic2018revisiting, + author = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {5706--5715}, + title = {Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + year = {2018}, +} +""", descriptive_stats={ "n_samples": {"test": 6392}, "avg_character_length": { @@ -72,14 +74,15 @@ class RParisMediumI2IRetrieval(MultiChoiceEvaluationMixin, AbsTaskAny2AnyRetriev dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{radenovic2018revisiting, - title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, - author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={5706--5715}, - year={2018} + bibtex_citation=r""" +@inproceedings{radenovic2018revisiting, + author = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {5706--5715}, + title = {Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + year = {2018}, } - """, +""", descriptive_stats={ "n_samples": {"test": 6392}, "avg_character_length": { @@ -118,14 +121,15 @@ class RParisHardI2IRetrieval(MultiChoiceEvaluationMixin, AbsTaskAny2AnyRetrieval dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{radenovic2018revisiting, - title={Revisiting oxford and paris: Large-scale image retrieval benchmarking}, - author={Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={5706--5715}, - year={2018} + bibtex_citation=r""" +@inproceedings{radenovic2018revisiting, + author = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {5706--5715}, + title = {Revisiting oxford and paris: Large-scale image retrieval benchmarking}, + year = {2018}, } - """, +""", descriptive_stats={ "n_samples": {"test": 6392}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/ReMuQIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/ReMuQIT2TRetrieval.py index d464efc972..205368a480 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/ReMuQIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/ReMuQIT2TRetrieval.py @@ -26,25 +26,27 @@ class ReMuQIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{luo-etal-2023-end, - title = "End-to-end Knowledge Retrieval with Multi-modal Queries", - author = "Luo, Man and - Fang, Zhiyuan and - Gokhale, Tejas and - Yang, Yezhou and - Baral, Chitta", - editor = "Rogers, Anna and - Boyd-Graber, Jordan and - Okazaki, Naoaki", - booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = jul, - year = "2023", - address = "Toronto, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2023.acl-long.478", - doi = "10.18653/v1/2023.acl-long.478", - pages = "8573--8589", -}""", + bibtex_citation=r""" +@inproceedings{luo-etal-2023-end, + address = {Toronto, Canada}, + author = {Luo, Man and +Fang, Zhiyuan and +Gokhale, Tejas and +Yang, Yezhou and +Baral, Chitta}, + booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.18653/v1/2023.acl-long.478}, + editor = {Rogers, Anna and +Boyd-Graber, Jordan and +Okazaki, Naoaki}, + month = jul, + pages = {8573--8589}, + publisher = {Association for Computational Linguistics}, + title = {End-to-end Knowledge Retrieval with Multi-modal Queries}, + url = {https://aclanthology.org/2023.acl-long.478}, + year = {2023}, +} +""", prompt={ "query": "Retrieve a fact-based paragraph that provides an answer to the given query about the image." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py index d62e152f45..722522ed62 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SOPI2IRetrieval.py @@ -26,13 +26,15 @@ class SOPI2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{oh2016deep, - title={Deep metric learning via lifted structured feature embedding}, - author={Oh Song, Hyun and Xiang, Yu and Jegelka, Stefanie and Savarese, Silvio}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={4004--4012}, - year={2016} -}""", + bibtex_citation=r""" +@inproceedings{oh2016deep, + author = {Oh Song, Hyun and Xiang, Yu and Jegelka, Stefanie and Savarese, Silvio}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {4004--4012}, + title = {Deep metric learning via lifted structured feature embedding}, + year = {2016}, +} +""", descriptive_stats={ "n_samples": {"test": 120053}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py index a8aac928c4..e2ba7e9742 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRI2TRetrieval.py @@ -86,12 +86,14 @@ class SciMMIRI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{wu2024scimmir, - title={SciMMIR: Benchmarking Scientific Multi-modal Information Retrieval}, - author={Wu, Siwei and Li, Yizhi and Zhu, Kang and Zhang, Ge and Liang, Yiming and Ma, Kaijing and Xiao, Chenghao and Zhang, Haoran and Yang, Bohao and Chen, Wenhu and others}, - journal={arXiv preprint arXiv:2401.13478}, - year={2024} -}""", + bibtex_citation=r""" +@article{wu2024scimmir, + author = {Wu, Siwei and Li, Yizhi and Zhu, Kang and Zhang, Ge and Liang, Yiming and Ma, Kaijing and Xiao, Chenghao and Zhang, Haoran and Yang, Bohao and Chen, Wenhu and others}, + journal = {arXiv preprint arXiv:2401.13478}, + title = {SciMMIR: Benchmarking Scientific Multi-modal Information Retrieval}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py index 41fa6aebc1..420db0d882 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SciMMIRT2IRetrieval.py @@ -86,12 +86,14 @@ class SciMMIRT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{wu2024scimmir, - title={SciMMIR: Benchmarking Scientific Multi-modal Information Retrieval}, - author={Wu, Siwei and Li, Yizhi and Zhu, Kang and Zhang, Ge and Liang, Yiming and Ma, Kaijing and Xiao, Chenghao and Zhang, Haoran and Yang, Bohao and Chen, Wenhu and others}, - journal={arXiv preprint arXiv:2401.13478}, - year={2024} -}""", + bibtex_citation=r""" +@article{wu2024scimmir, + author = {Wu, Siwei and Li, Yizhi and Zhu, Kang and Zhang, Ge and Liang, Yiming and Ma, Kaijing and Xiao, Chenghao and Zhang, Haoran and Yang, Bohao and Chen, Wenhu and others}, + journal = {arXiv preprint arXiv:2401.13478}, + title = {SciMMIR: Benchmarking Scientific Multi-modal Information Retrieval}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py index cb70bdb4e8..5624b109e2 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/SketchyI2IRetrieval.py @@ -26,12 +26,14 @@ class SketchyI2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{ypsilantis2021met, - title={The met dataset: Instance-level recognition for artworks}, - author={Ypsilantis, Nikolaos-Antonios and Garcia, Noa and Han, Guangxing and Ibrahimi, Sarah and Van Noord, Nanne and Tolias, Giorgos}, - booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{ypsilantis2021met, + author = {Ypsilantis, Nikolaos-Antonios and Garcia, Noa and Han, Guangxing and Ibrahimi, Sarah and Van Noord, Nanne and Tolias, Giorgos}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + title = {The met dataset: Instance-level recognition for artworks}, + year = {2021}, +} +""", descriptive_stats={ "n_samples": {"test": 452886}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py index 6649054fb8..e3c7fc0a3d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/StanfordCarsI2IRetrieval.py @@ -26,12 +26,14 @@ class StanfordCarsI2I(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{Krause2013CollectingAL, - title={Collecting a Large-scale Dataset of Fine-grained Cars}, - author={Jonathan Krause and Jia Deng and Michael Stark and Li Fei-Fei}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:16632981} - }""", + bibtex_citation=r""" +@inproceedings{Krause2013CollectingAL, + author = {Jonathan Krause and Jia Deng and Michael Stark and Li Fei-Fei}, + title = {Collecting a Large-scale Dataset of Fine-grained Cars}, + url = {https://api.semanticscholar.org/CorpusID:16632981}, + year = {2013}, +} +""", descriptive_stats={ "n_samples": {"default": 8041}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py index b85cd1f94b..2d904f2a9a 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/TUBerlinT2IRetrieval.py @@ -27,16 +27,18 @@ class TUBerlinT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{eitz2012humans, - title={How do humans sketch objects?}, - author={Eitz, Mathias and Hays, James and Alexa, Marc}, - journal={ACM Transactions on graphics (TOG)}, - volume={31}, - number={4}, - pages={1--10}, - year={2012}, - publisher={Acm New York, NY, USA} -}""", + bibtex_citation=r""" +@article{eitz2012humans, + author = {Eitz, Mathias and Hays, James and Alexa, Marc}, + journal = {ACM Transactions on graphics (TOG)}, + number = {4}, + pages = {1--10}, + publisher = {Acm New York, NY, USA}, + title = {How do humans sketch objects?}, + volume = {31}, + year = {2012}, +} +""", descriptive_stats={ "n_samples": {"test": 250}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py index 7bb5695e02..04c94f7365 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VQA2IT2TRetrieval.py @@ -27,13 +27,15 @@ class VQA2IT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@InProceedings{Goyal_2017_CVPR, -author = {Goyal, Yash and Khot, Tejas and Summers-Stay, Douglas and Batra, Dhruv and Parikh, Devi}, -title = {Making the v in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering}, -booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, -month = {July}, -year = {2017} -}""", + bibtex_citation=r""" +@inproceedings{Goyal_2017_CVPR, + author = {Goyal, Yash and Khot, Tejas and Summers-Stay, Douglas and Batra, Dhruv and Parikh, Devi}, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {July}, + title = {Making the v in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering}, + year = {2017}, +} +""", descriptive_stats={ "n_samples": {"test": 4319}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py index f279d9b277..0031778a98 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VidoreBenchRetrieval.py @@ -92,12 +92,14 @@ class VidoreArxivQARetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -146,12 +148,14 @@ class VidoreDocVQARetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -200,12 +204,14 @@ class VidoreInfoVQARetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -254,12 +260,14 @@ class VidoreTabfquadRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -308,12 +316,14 @@ class VidoreTatdqaRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -362,12 +372,14 @@ class VidoreShiftProjectRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -416,12 +428,14 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -470,12 +484,14 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -524,12 +540,14 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, @@ -578,12 +596,14 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{faysse2024colpali, - title={ColPali: Efficient Document Retrieval with Vision Language Models}, - author={Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, - journal={arXiv preprint arXiv:2407.01449}, - year={2024} -}""", + bibtex_citation=r""" +@article{faysse2024colpali, + author = {Faysse, Manuel and Sibille, Hugues and Wu, Tony and Viaud, Gautier and Hudelot, C{\'e}line and Colombo, Pierre}, + journal = {arXiv preprint arXiv:2407.01449}, + title = {ColPali: Efficient Document Retrieval with Vision Language Models}, + year = {2024}, +} +""", prompt={"query": "Find a screenshot that relevant to the user's question."}, descriptive_stats={ "n_samples": None, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py index 2f79bfe9eb..5ed6d8635c 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsI2TRetrieval.py @@ -26,13 +26,15 @@ class VisualNewsI2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{liu2021visual, - title={Visual News: Benchmark and Challenges in News Image Captioning}, - author={Liu, Fuxiao and Wang, Yinghan and Wang, Tianlu and Ordonez, Vicente}, - booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, - pages={6761--6771}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{liu2021visual, + author = {Liu, Fuxiao and Wang, Yinghan and Wang, Tianlu and Ordonez, Vicente}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + pages = {6761--6771}, + title = {Visual News: Benchmark and Challenges in News Image Captioning}, + year = {2021}, +} +""", prompt={"query": "Find a caption for the news in the given photo."}, descriptive_stats={ "n_samples": {"test": 20000}, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py index 1c5fa7fdbe..de8ddc4df5 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VisualNewsT2IRetrieval.py @@ -26,13 +26,15 @@ class VisualNewsT2IRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{liu2021visual, - title={Visual News: Benchmark and Challenges in News Image Captioning}, - author={Liu, Fuxiao and Wang, Yinghan and Wang, Tianlu and Ordonez, Vicente}, - booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, - pages={6761--6771}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{liu2021visual, + author = {Liu, Fuxiao and Wang, Yinghan and Wang, Tianlu and Ordonez, Vicente}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + pages = {6761--6771}, + title = {Visual News: Benchmark and Challenges in News Image Captioning}, + year = {2021}, +} +""", prompt={ "query": "Identify the news-related image in line with the described event." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py index 768332f3ad..a6970d157d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/VizWizIT2TRetrieval.py @@ -27,13 +27,15 @@ class VizWizIT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{gurari2018vizwiz, - title={Vizwiz grand challenge: Answering visual questions from blind people}, - author={Gurari, Danna and Li, Qing and Stangl, Abigale J and Guo, Anhong and Lin, Chi and Grauman, Kristen and Luo, Jiebo and Bigham, Jeffrey P}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={3608--3617}, - year={2018} -}""", + bibtex_citation=r""" +@inproceedings{gurari2018vizwiz, + author = {Gurari, Danna and Li, Qing and Stangl, Abigale J and Guo, Anhong and Lin, Chi and Grauman, Kristen and Luo, Jiebo and Bigham, Jeffrey P}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {3608--3617}, + title = {Vizwiz grand challenge: Answering visual questions from blind people}, + year = {2018}, +} +""", descriptive_stats={ "n_samples": {"test": 214354}, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py index e3235c4912..307fe74259 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2ITRetrieval.py @@ -26,13 +26,15 @@ class WebQAT2ITRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{chang2022webqa, - title={Webqa: Multihop and multimodal qa}, - author={Chang, Yingshan and Narang, Mridu and Suzuki, Hisami and Cao, Guihong and Gao, Jianfeng and Bisk, Yonatan}, - booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, - pages={16495--16504}, - year={2022} - }""", + bibtex_citation=r""" +@inproceedings{chang2022webqa, + author = {Chang, Yingshan and Narang, Mridu and Suzuki, Hisami and Cao, Guihong and Gao, Jianfeng and Bisk, Yonatan}, + booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages = {16495--16504}, + title = {Webqa: Multihop and multimodal qa}, + year = {2022}, +} +""", prompt={"query": "Find a Wikipedia image that answers this question."}, descriptive_stats={ "n_samples": {"test": 2511}, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py index 4583e61221..cfc760cba0 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/eng/WebQAT2TRetrieval.py @@ -26,13 +26,15 @@ class WebQAT2TRetrieval(AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text"], sample_creation="created", - bibtex_citation="""@inproceedings{chang2022webqa, - title={Webqa: Multihop and multimodal qa}, - author={Chang, Yingshan and Narang, Mridu and Suzuki, Hisami and Cao, Guihong and Gao, Jianfeng and Bisk, Yonatan}, - booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, - pages={16495--16504}, - year={2022} - }""", + bibtex_citation=r""" +@inproceedings{chang2022webqa, + author = {Chang, Yingshan and Narang, Mridu and Suzuki, Hisami and Cao, Guihong and Gao, Jianfeng and Bisk, Yonatan}, + booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages = {16495--16504}, + title = {Webqa: Multihop and multimodal qa}, + year = {2022}, +} +""", prompt={ "query": "Retrieve passages from Wikipedia that provide answers to the following question." }, diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/VdrMultilingualRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/VdrMultilingualRetrieval.py index 8ebc7c30b3..0355144949 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/VdrMultilingualRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/VdrMultilingualRetrieval.py @@ -123,12 +123,14 @@ class VDRMultilingualRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): annotations_creators="LM-generated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{llamaindex2024vdrmultilingual, - title={Visual Document Retrieval Goes Multilingual}, - author={LlamaIndex}, - year={2025}, - howpublished={https://huggingface.co/datasets/llamaindex/vdr-multilingual-test}, -}""", + bibtex_citation=r""" +@misc{llamaindex2024vdrmultilingual, + author = {LlamaIndex}, + howpublished = {https://huggingface.co/datasets/llamaindex/vdr-multilingual-test}, + title = {Visual Document Retrieval Goes Multilingual}, + year = {2025}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py index 884729d8ab..b67c2c0262 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/WITT2IRetrieval.py @@ -114,14 +114,16 @@ class WITT2IRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{bugliarello2022iglue, - title={IGLUE: A benchmark for transfer learning across modalities, tasks, and languages}, - author={Bugliarello, Emanuele and Liu, Fangyu and Pfeiffer, Jonas and Reddy, Siva and Elliott, Desmond and Ponti, Edoardo Maria and Vuli{\'c}, Ivan}, - booktitle={International Conference on Machine Learning}, - pages={2370--2392}, - year={2022}, - organization={PMLR} -}""", + bibtex_citation=r""" +@inproceedings{bugliarello2022iglue, + author = {Bugliarello, Emanuele and Liu, Fangyu and Pfeiffer, Jonas and Reddy, Siva and Elliott, Desmond and Ponti, Edoardo Maria and Vuli{\'c}, Ivan}, + booktitle = {International Conference on Machine Learning}, + organization = {PMLR}, + pages = {2370--2392}, + title = {IGLUE: A benchmark for transfer learning across modalities, tasks, and languages}, + year = {2022}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py index 4370c2752c..0e08df2b9d 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XFlickr30kCoT2IRetrieval.py @@ -97,14 +97,16 @@ class XFlickr30kCoT2IRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{bugliarello2022iglue, - title={IGLUE: A benchmark for transfer learning across modalities, tasks, and languages}, - author={Bugliarello, Emanuele and Liu, Fangyu and Pfeiffer, Jonas and Reddy, Siva and Elliott, Desmond and Ponti, Edoardo Maria and Vuli{\'c}, Ivan}, - booktitle={International Conference on Machine Learning}, - pages={2370--2392}, - year={2022}, - organization={PMLR} -}""", + bibtex_citation=r""" +@inproceedings{bugliarello2022iglue, + author = {Bugliarello, Emanuele and Liu, Fangyu and Pfeiffer, Jonas and Reddy, Siva and Elliott, Desmond and Ponti, Edoardo Maria and Vuli{\'c}, Ivan}, + booktitle = {International Conference on Machine Learning}, + organization = {PMLR}, + pages = {2370--2392}, + title = {IGLUE: A benchmark for transfer learning across modalities, tasks, and languages}, + year = {2022}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py index 880c7aade8..17136c8e3a 100644 --- a/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py +++ b/mteb/tasks/Image/Any2AnyRetrieval/multilingual/XM3600T2IRetrieval.py @@ -142,13 +142,15 @@ class XM3600T2IRetrieval(MultilingualTask, AbsTaskAny2AnyRetrieval): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@inproceedings{thapliyal2022crossmodal, - title={Crossmodal-3600: A Massively Multilingual Multimodal Evaluation Dataset}, - author={Thapliyal, Ashish V and Tuset, Jordi Pont and Chen, Xi and Soricut, Radu}, - booktitle={Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing}, - pages={715--729}, - year={2022} -}""", + bibtex_citation=r""" +@inproceedings{thapliyal2022crossmodal, + author = {Thapliyal, Ashish V and Tuset, Jordi Pont and Chen, Xi and Soricut, Radu}, + booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing}, + pages = {715--729}, + title = {Crossmodal-3600: A Massively Multilingual Multimodal Evaluation Dataset}, + year = {2022}, +} +""", descriptive_stats={ "n_samples": None, "avg_character_length": { diff --git a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py index ab459d5846..ed1d732bed 100644 --- a/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/BirdsnapClassification.py @@ -29,13 +29,15 @@ class BirdsnapClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@InProceedings{Berg_2014_CVPR, - author = {Berg, Thomas and Liu, Jiongxin and Woo Lee, Seung and Alexander, Michelle L. and Jacobs, David W. and Belhumeur, Peter N.}, - title = {Birdsnap: Large-scale Fine-grained Visual Categorization of Birds}, - booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - month = {June}, - year = {2014} - }""", + bibtex_citation=r""" +@inproceedings{Berg_2014_CVPR, + author = {Berg, Thomas and Liu, Jiongxin and Woo Lee, Seung and Alexander, Michelle L. and Jacobs, David W. and Belhumeur, Peter N.}, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + title = {Birdsnap: Large-scale Fine-grained Visual Categorization of Birds}, + year = {2014}, +} +""", descriptive_stats={ "n_samples": {"test": 1851}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py index 31b9099fd5..7c02ae79ce 100644 --- a/mteb/tasks/Image/ImageClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ImageClassification/eng/CIFAR.py @@ -29,12 +29,14 @@ class CIFAR10Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, - author = {Alex Krizhevsky}, - title = {Learning multiple layers of features from tiny images}, - institution = {}, - year = {2009} - }""", + bibtex_citation=r""" +@techreport{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + institution = {}, + title = {Learning multiple layers of features from tiny images}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, @@ -68,13 +70,14 @@ class CIFAR100Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, - author = {Alex Krizhevsky}, - title = {Learning multiple layers of features from tiny images}, - institution = {}, - year = {2009} - } - """, + bibtex_citation=r""" +@techreport{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + institution = {}, + title = {Learning multiple layers of features from tiny images}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py index ca12ece177..112df6e793 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Caltech101Classification.py @@ -31,17 +31,19 @@ class Caltech101Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{1384978, - author={Li Fei-Fei and Fergus, R. and Perona, P.}, - booktitle={2004 Conference on Computer Vision and Pattern Recognition Workshop}, - title={Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories}, - year={2004}, - volume={}, - number={}, - pages={178-178}, - keywords={Bayesian methods;Testing;Humans;Maximum likelihood estimation;Assembly;Shape;Machine vision;Image recognition;Parameter estimation;Image databases}, - doi={10.1109/CVPR.2004.383} - }""", + bibtex_citation=r""" +@inproceedings{1384978, + author = {Li Fei-Fei and Fergus, R. and Perona, P.}, + booktitle = {2004 Conference on Computer Vision and Pattern Recognition Workshop}, + doi = {10.1109/CVPR.2004.383}, + keywords = {Bayesian methods;Testing;Humans;Maximum likelihood estimation;Assembly;Shape;Machine vision;Image recognition;Parameter estimation;Image databases}, + number = {}, + pages = {178-178}, + title = {Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories}, + volume = {}, + year = {2004}, +} +""", descriptive_stats={ "n_samples": {"test": 6084}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py index b73f895595..1ea60abd63 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Country211Classification.py @@ -29,12 +29,14 @@ class Country211Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@article{radford2021learning, - title={Learning Transferable Visual Models From Natural Language Supervision}, - author={Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others}, - journal={arXiv preprint arXiv:2103.00020}, - year={2021} - }""", + bibtex_citation=r""" +@article{radford2021learning, + author = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others}, + journal = {arXiv preprint arXiv:2103.00020}, + title = {Learning Transferable Visual Models From Natural Language Supervision}, + year = {2021}, +} +""", descriptive_stats={ "n_samples": {"test": 21100}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py index 9ef154f728..ec6dfc5708 100644 --- a/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/DTDClassification.py @@ -29,12 +29,14 @@ class DTDClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@InProceedings{cimpoi14describing, - Author = {M. Cimpoi and S. Maji and I. Kokkinos and S. Mohamed and and A. Vedaldi}, - Title = {Describing Textures in the Wild}, - Booktitle = {Proceedings of the {IEEE} Conf. on Computer Vision and Pattern Recognition ({CVPR})}, - Year = {2014} - }""", + bibtex_citation=r""" +@inproceedings{cimpoi14describing, + author = {M. Cimpoi and S. Maji and I. Kokkinos and S. Mohamed and and A. Vedaldi}, + booktitle = {Proceedings of the {IEEE} Conf. on Computer Vision and Pattern Recognition ({CVPR})}, + title = {Describing Textures in the Wild}, + year = {2014}, +} +""", descriptive_stats={ "n_samples": {"test": 1880}, "avg_character_length": {"test": 456}, diff --git a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py index 7db9f482dd..ed3d092c98 100644 --- a/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/EuroSATClassification.py @@ -29,17 +29,19 @@ class EuroSATClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@ARTICLE{8736785, - author={Helber, Patrick and Bischke, Benjamin and Dengel, Andreas and Borth, Damian}, - journal={IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing}, - title={EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification}, - year={2019}, - volume={12}, - number={7}, - pages={2217-2226}, - keywords={Satellites;Earth;Remote sensing;Machine learning;Spatial resolution;Feature extraction;Benchmark testing;Dataset;deep convolutional neural network;deep learning;earth observation;land cover classification;land use classification;machine learning;remote sensing;satellite image classification;satellite images}, - doi={10.1109/JSTARS.2019.2918242}} - """, + bibtex_citation=r""" +@article{8736785, + author = {Helber, Patrick and Bischke, Benjamin and Dengel, Andreas and Borth, Damian}, + doi = {10.1109/JSTARS.2019.2918242}, + journal = {IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing}, + keywords = {Satellites;Earth;Remote sensing;Machine learning;Spatial resolution;Feature extraction;Benchmark testing;Dataset;deep convolutional neural network;deep learning;earth observation;land cover classification;land use classification;machine learning;remote sensing;satellite image classification;satellite images}, + number = {7}, + pages = {2217-2226}, + title = {EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification}, + volume = {12}, + year = {2019}, +} +""", descriptive_stats={ "n_samples": {"test": 5400}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py index 1aead2e3eb..2f32ae8629 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FER2013Classification.py @@ -29,15 +29,17 @@ class FER2013Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@misc{goodfellow2015explainingharnessingadversarialexamples, - title={Explaining and Harnessing Adversarial Examples}, - author={Ian J. Goodfellow and Jonathon Shlens and Christian Szegedy}, - year={2015}, - eprint={1412.6572}, - archivePrefix={arXiv}, - primaryClass={stat.ML}, - url={https://arxiv.org/abs/1412.6572}, - }""", + bibtex_citation=r""" +@misc{goodfellow2015explainingharnessingadversarialexamples, + archiveprefix = {arXiv}, + author = {Ian J. Goodfellow and Jonathon Shlens and Christian Szegedy}, + eprint = {1412.6572}, + primaryclass = {stat.ML}, + title = {Explaining and Harnessing Adversarial Examples}, + url = {https://arxiv.org/abs/1412.6572}, + year = {2015}, +} +""", descriptive_stats={ "n_samples": {"test": 7178}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py index b45da94178..96d525a95e 100644 --- a/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/FGVCAircraftClassification.py @@ -30,15 +30,17 @@ class FGVCAircraftClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft, - title={Fine-Grained Visual Classification of Aircraft}, - author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, - year={2013}, - eprint={1306.5151}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/1306.5151}, - }""", + bibtex_citation=r""" +@misc{maji2013finegrainedvisualclassificationaircraft, + archiveprefix = {arXiv}, + author = {Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, + eprint = {1306.5151}, + primaryclass = {cs.CV}, + title = {Fine-Grained Visual Classification of Aircraft}, + url = {https://arxiv.org/abs/1306.5151}, + year = {2013}, +} +""", descriptive_stats={ "n_samples": {"test": 3333}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py index f72dc84c8c..a53b064ec6 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/Food101Classification.py @@ -29,12 +29,14 @@ class Food101Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @inproceedings{bossard14, - title = {Food-101 -- Mining Discriminative Components with Random Forests}, - author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc}, - booktitle = {European Conference on Computer Vision}, - year = {2014} - }""", + bibtex_citation=r""" +@inproceedings{bossard14, + author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc}, + booktitle = {European Conference on Computer Vision}, + title = {Food-101 -- Mining Discriminative Components with Random Forests}, + year = {2014}, +} +""", descriptive_stats={ "n_samples": {"validation": 25300}, "avg_character_length": {"validation": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py index 8da34a65e9..528f50f7b1 100644 --- a/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/GTSRBClassification.py @@ -29,17 +29,19 @@ class GTSRBClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{6033395, - author={Stallkamp, Johannes and Schlipsing, Marc and Salmen, Jan and Igel, Christian}, - booktitle={The 2011 International Joint Conference on Neural Networks}, - title={The German Traffic Sign Recognition Benchmark: A multi-class classification competition}, - year={2011}, - volume={}, - number={}, - pages={1453-1460}, - keywords={Humans;Training;Image color analysis;Benchmark testing;Lead;Histograms;Image resolution}, - doi={10.1109/IJCNN.2011.6033395} - }""", + bibtex_citation=r""" +@inproceedings{6033395, + author = {Stallkamp, Johannes and Schlipsing, Marc and Salmen, Jan and Igel, Christian}, + booktitle = {The 2011 International Joint Conference on Neural Networks}, + doi = {10.1109/IJCNN.2011.6033395}, + keywords = {Humans;Training;Image color analysis;Benchmark testing;Lead;Histograms;Image resolution}, + number = {}, + pages = {1453-1460}, + title = {The German Traffic Sign Recognition Benchmark: A multi-class classification competition}, + volume = {}, + year = {2011}, +} +""", descriptive_stats={ "n_samples": {"test": 12630}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py index bed879d282..0a9155cc8f 100644 --- a/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py +++ b/mteb/tasks/Image/ImageClassification/eng/Imagenet1k.py @@ -29,14 +29,16 @@ class Imagenet1kClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@article{deng2009imagenet, - title={ImageNet: A large-scale hierarchical image database}, - author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, - journal={2009 IEEE Conference on Computer Vision and Pattern Recognition}, - pages={248--255}, - year={2009}, - organization={Ieee} - }""", + bibtex_citation=r""" +@article{deng2009imagenet, + author = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, + journal = {2009 IEEE Conference on Computer Vision and Pattern Recognition}, + organization = {Ieee}, + pages = {248--255}, + title = {ImageNet: A large-scale hierarchical image database}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 37200}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py index 24fb508e38..3a86d8b999 100644 --- a/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/MNISTClassification.py @@ -29,13 +29,15 @@ class MNISTClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@article{lecun2010mnist, - title={MNIST handwritten digit database}, - author={LeCun, Yann and Cortes, Corinna and Burges, CJ}, - journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist}, - volume={2}, - year={2010} - }""", + bibtex_citation=r""" +@article{lecun2010mnist, + author = {LeCun, Yann and Cortes, Corinna and Burges, CJ}, + journal = {ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist}, + title = {MNIST handwritten digit database}, + volume = {2}, + year = {2010}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py index 9411f88baf..d9453b8308 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordFlowersClassification.py @@ -29,16 +29,19 @@ class OxfordFlowersClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="found", - bibtex_citation="""@INPROCEEDINGS{4756141, - author={Nilsback, Maria-Elena and Zisserman, Andrew}, - booktitle={2008 Sixth Indian Conference on Computer Vision, Graphics & Image Processing}, - title={Automated Flower Classification over a Large Number of Classes}, - year={2008}, - volume={}, - number={}, - pages={722-729}, - keywords={Shape;Kernel;Distributed computing;Support vector machines;Support vector machine classification;object classification;segmentation}, - doi={10.1109/ICVGIP.2008.47}}""", + bibtex_citation=r""" +@inproceedings{4756141, + author = {Nilsback, Maria-Elena and Zisserman, Andrew}, + booktitle = {2008 Sixth Indian Conference on Computer Vision, Graphics & Image Processing}, + doi = {10.1109/ICVGIP.2008.47}, + keywords = {Shape;Kernel;Distributed computing;Support vector machines;Support vector machine classification;object classification;segmentation}, + number = {}, + pages = {722-729}, + title = {Automated Flower Classification over a Large Number of Classes}, + volume = {}, + year = {2008}, +} +""", descriptive_stats={ "n_samples": {"test": 400000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py index ea83ecb522..5a287ba5a7 100644 --- a/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/OxfordPetsClassification.py @@ -29,17 +29,19 @@ class OxfordPetsClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{6248092, - author={Parkhi, Omkar M and Vedaldi, Andrea and Zisserman, Andrew and Jawahar, C. V.}, - booktitle={2012 IEEE Conference on Computer Vision and Pattern Recognition}, - title={Cats and dogs}, - year={2012}, - volume={}, - number={}, - pages={3498-3505}, - keywords={Positron emission tomography;Image segmentation;Cats;Dogs;Layout;Deformable models;Head}, - doi={10.1109/CVPR.2012.6248092} - }""", + bibtex_citation=r""" +@inproceedings{6248092, + author = {Parkhi, Omkar M and Vedaldi, Andrea and Zisserman, Andrew and Jawahar, C. V.}, + booktitle = {2012 IEEE Conference on Computer Vision and Pattern Recognition}, + doi = {10.1109/CVPR.2012.6248092}, + keywords = {Positron emission tomography;Image segmentation;Cats;Dogs;Layout;Deformable models;Head}, + number = {}, + pages = {3498-3505}, + title = {Cats and dogs}, + volume = {}, + year = {2012}, +} +""", descriptive_stats={ "n_samples": {"test": 3669}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py index 27508c8c17..0baab06893 100644 --- a/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/PatchCamelyonClassification.py @@ -29,25 +29,26 @@ class PatchCamelyonClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@InProceedings{10.1007/978-3-030-00934-2_24, -author="Veeling, Bastiaan S. + bibtex_citation=r""" +@inproceedings{10.1007/978-3-030-00934-2_24, + abstract = {We propose a new model for digital pathology segmentation, based on the observation that histopathology images are inherently symmetric under rotation and reflection. Utilizing recent findings on rotation equivariant CNNs, the proposed model leverages these symmetries in a principled manner. We present a visual analysis showing improved stability on predictions, and demonstrate that exploiting rotation equivariance significantly improves tumor detection performance on a challenging lymph node metastases dataset. We further present a novel derived dataset to enable principled comparison of machine learning models, in combination with an initial benchmark. Through this dataset, the task of histopathology diagnosis becomes accessible as a challenging benchmark for fundamental machine learning research.}, + address = {Cham}, + author = {Veeling, Bastiaan S. and Linmans, Jasper and Winkens, Jim and Cohen, Taco -and Welling, Max", -editor="Frangi, Alejandro F. +and Welling, Max}, + booktitle = {Medical Image Computing and Computer Assisted Intervention -- MICCAI 2018}, + editor = {Frangi, Alejandro F. and Schnabel, Julia A. and Davatzikos, Christos and Alberola-L{\'o}pez, Carlos -and Fichtinger, Gabor", -title="Rotation Equivariant CNNs for Digital Pathology", -booktitle="Medical Image Computing and Computer Assisted Intervention -- MICCAI 2018", -year="2018", -publisher="Springer International Publishing", -address="Cham", -pages="210--218", -abstract="We propose a new model for digital pathology segmentation, based on the observation that histopathology images are inherently symmetric under rotation and reflection. Utilizing recent findings on rotation equivariant CNNs, the proposed model leverages these symmetries in a principled manner. We present a visual analysis showing improved stability on predictions, and demonstrate that exploiting rotation equivariance significantly improves tumor detection performance on a challenging lymph node metastases dataset. We further present a novel derived dataset to enable principled comparison of machine learning models, in combination with an initial benchmark. Through this dataset, the task of histopathology diagnosis becomes accessible as a challenging benchmark for fundamental machine learning research.", -isbn="978-3-030-00934-2" +and Fichtinger, Gabor}, + isbn = {978-3-030-00934-2}, + pages = {210--218}, + publisher = {Springer International Publishing}, + title = {Rotation Equivariant CNNs for Digital Pathology}, + year = {2018}, } """, descriptive_stats={ diff --git a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py index 6f403587fd..0c1bfe3e14 100644 --- a/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/RESISC45Classification.py @@ -29,17 +29,19 @@ class RESISC45Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@ARTICLE{7891544, - author={Cheng, Gong and Han, Junwei and Lu, Xiaoqiang}, - journal={Proceedings of the IEEE}, - title={Remote Sensing Image Scene Classification: Benchmark and State of the Art}, - year={2017}, - volume={105}, - number={10}, - pages={1865-1883}, - keywords={Remote sensing;Benchmark testing;Spatial resolution;Social network services;Satellites;Image analysis;Machine learning;Unsupervised learning;Classification;Benchmark data set;deep learning;handcrafted features;remote sensing image;scene classification;unsupervised feature learning}, - doi={10.1109/JPROC.2017.2675998} - }""", + bibtex_citation=r""" +@article{7891544, + author = {Cheng, Gong and Han, Junwei and Lu, Xiaoqiang}, + doi = {10.1109/JPROC.2017.2675998}, + journal = {Proceedings of the IEEE}, + keywords = {Remote sensing;Benchmark testing;Spatial resolution;Social network services;Satellites;Image analysis;Machine learning;Unsupervised learning;Classification;Benchmark data set;deep learning;handcrafted features;remote sensing image;scene classification;unsupervised feature learning}, + number = {10}, + pages = {1865-1883}, + title = {Remote Sensing Image Scene Classification: Benchmark and State of the Art}, + volume = {105}, + year = {2017}, +} +""", descriptive_stats={ "n_samples": {"test": 6300}, "avg_character_length": {"test": 256}, diff --git a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py index bc5b90f790..8ba861f1e8 100644 --- a/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/STL10Classification.py @@ -29,21 +29,23 @@ class STL10Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@InProceedings{pmlr-v15-coates11a, - title = {An Analysis of Single-Layer Networks in Unsupervised Feature Learning}, - author = {Coates, Adam and Ng, Andrew and Lee, Honglak}, - booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, - pages = {215--223}, - year = {2011}, - editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, - volume = {15}, - series = {Proceedings of Machine Learning Research}, - address = {Fort Lauderdale, FL, USA}, - month = {11--13 Apr}, - publisher = {PMLR}, - pdf = {http://proceedings.mlr.press/v15/coates11a/coates11a.pdf}, - url = {https://proceedings.mlr.press/v15/coates11a.html}, - }""", + bibtex_citation=r""" +@inproceedings{pmlr-v15-coates11a, + address = {Fort Lauderdale, FL, USA}, + author = {Coates, Adam and Ng, Andrew and Lee, Honglak}, + booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, + editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, + month = {11--13 Apr}, + pages = {215--223}, + pdf = {http://proceedings.mlr.press/v15/coates11a/coates11a.pdf}, + publisher = {PMLR}, + series = {Proceedings of Machine Learning Research}, + title = {An Analysis of Single-Layer Networks in Unsupervised Feature Learning}, + url = {https://proceedings.mlr.press/v15/coates11a.html}, + volume = {15}, + year = {2011}, +} +""", descriptive_stats={ "n_samples": {"test": 8000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py index 6e7c194f60..e6383a46e6 100644 --- a/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/SUN397Classification.py @@ -29,16 +29,18 @@ class SUN397Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{5539970, - author={Xiao, Jianxiong and Hays, James and Ehinger, Krista A. and Oliva, Aude and Torralba, Antonio}, - booktitle={2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, - title={SUN database: Large-scale scene recognition from abbey to zoo}, - year={2010}, - volume={}, - number={}, - pages={3485-3492}, - doi={10.1109/CVPR.2010.5539970} - }""", + bibtex_citation=r""" +@inproceedings{5539970, + author = {Xiao, Jianxiong and Hays, James and Ehinger, Krista A. and Oliva, Aude and Torralba, Antonio}, + booktitle = {2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, + doi = {10.1109/CVPR.2010.5539970}, + number = {}, + pages = {3485-3492}, + title = {SUN database: Large-scale scene recognition from abbey to zoo}, + volume = {}, + year = {2010}, +} +""", descriptive_stats={ "n_samples": {"test": 21750}, "avg_character_length": {"test": 256}, diff --git a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py index 625185a00a..34f1fe2cb2 100644 --- a/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py +++ b/mteb/tasks/Image/ImageClassification/eng/StanfordCarsClassification.py @@ -29,12 +29,14 @@ class StanfordCarsClassification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@inproceedings{Krause2013CollectingAL, - title={Collecting a Large-scale Dataset of Fine-grained Cars}, - author={Jonathan Krause and Jia Deng and Michael Stark and Li Fei-Fei}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:16632981} - }""", + bibtex_citation=r""" +@inproceedings{Krause2013CollectingAL, + author = {Jonathan Krause and Jia Deng and Michael Stark and Li Fei-Fei}, + title = {Collecting a Large-scale Dataset of Fine-grained Cars}, + url = {https://api.semanticscholar.org/CorpusID:16632981}, + year = {2013}, +} +""", descriptive_stats={ "n_samples": {"test": 8041}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py index 8d9f706598..8ffa24823b 100644 --- a/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py +++ b/mteb/tasks/Image/ImageClassification/eng/UCF101Classification.py @@ -33,15 +33,17 @@ class UCF101Classification(AbsTaskImageClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@misc{soomro2012ucf101dataset101human, - title={UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild}, - author={Khurram Soomro and Amir Roshan Zamir and Mubarak Shah}, - year={2012}, - eprint={1212.0402}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/1212.0402}, - }""", + bibtex_citation=r""" +@misc{soomro2012ucf101dataset101human, + archiveprefix = {arXiv}, + author = {Khurram Soomro and Amir Roshan Zamir and Mubarak Shah}, + eprint = {1212.0402}, + primaryclass = {cs.CV}, + title = {UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild}, + url = {https://arxiv.org/abs/1212.0402}, + year = {2012}, +} +""", descriptive_stats={ "n_samples": {"test": 697222}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ImageClustering/eng/CIFAR.py b/mteb/tasks/Image/ImageClustering/eng/CIFAR.py index 316fe06665..f64b728f14 100644 --- a/mteb/tasks/Image/ImageClustering/eng/CIFAR.py +++ b/mteb/tasks/Image/ImageClustering/eng/CIFAR.py @@ -29,12 +29,14 @@ class CIFAR10Clustering(AbsTaskImageClustering): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, - author = {Alex Krizhevsky}, - title = {Learning multiple layers of features from tiny images}, - institution = {}, - year = {2009} - }""", + bibtex_citation=r""" +@techreport{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + institution = {}, + title = {Learning multiple layers of features from tiny images}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, @@ -69,12 +71,14 @@ class CIFAR100Clustering(AbsTaskImageClustering): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, - author = {Alex Krizhevsky}, - title = {Learning multiple layers of features from tiny images}, - institution = {}, - year = {2009} - }""", + bibtex_citation=r""" +@techreport{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + institution = {}, + title = {Learning multiple layers of features from tiny images}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageClustering/eng/ImageNet.py b/mteb/tasks/Image/ImageClustering/eng/ImageNet.py index 4360712983..2de235666f 100644 --- a/mteb/tasks/Image/ImageClustering/eng/ImageNet.py +++ b/mteb/tasks/Image/ImageClustering/eng/ImageNet.py @@ -26,17 +26,19 @@ class ImageNetDog15Clustering(AbsTaskImageClustering): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @INPROCEEDINGS{5206848, - author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei}, - booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition}, - title={ImageNet: A large-scale hierarchical image database}, - year={2009}, - volume={}, - number={}, - pages={248-255}, - keywords={Large-scale systems;Image databases;Explosions;Internet;Robustness;Information retrieval;Image retrieval;Multimedia databases;Ontologies;Spine}, - doi={10.1109/CVPR.2009.5206848} - }""", + bibtex_citation=r""" +@inproceedings{5206848, + author = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei}, + booktitle = {2009 IEEE Conference on Computer Vision and Pattern Recognition}, + doi = {10.1109/CVPR.2009.5206848}, + keywords = {Large-scale systems;Image databases;Explosions;Internet;Robustness;Information retrieval;Image retrieval;Multimedia databases;Ontologies;Spine}, + number = {}, + pages = {248-255}, + title = {ImageNet: A large-scale hierarchical image database}, + volume = {}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 1076, "train": 1500}, # "avg_character_length": {"test": 431.4}, @@ -66,17 +68,19 @@ class ImageNet10Clustering(AbsTaskImageClustering): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation=""" @INPROCEEDINGS{5206848, - author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei}, - booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition}, - title={ImageNet: A large-scale hierarchical image database}, - year={2009}, - volume={}, - number={}, - pages={248-255}, - keywords={Large-scale systems;Image databases;Explosions;Internet;Robustness;Information retrieval;Image retrieval;Multimedia databases;Ontologies;Spine}, - doi={10.1109/CVPR.2009.5206848}} - """, + bibtex_citation=r""" +@inproceedings{5206848, + author = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei}, + booktitle = {2009 IEEE Conference on Computer Vision and Pattern Recognition}, + doi = {10.1109/CVPR.2009.5206848}, + keywords = {Large-scale systems;Image databases;Explosions;Internet;Robustness;Information retrieval;Image retrieval;Multimedia databases;Ontologies;Spine}, + number = {}, + pages = {248-255}, + title = {ImageNet: A large-scale hierarchical image database}, + volume = {}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 13000}, # "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py b/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py index d9bb6cfaa2..d7907983c2 100644 --- a/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py +++ b/mteb/tasks/Image/ImageMultilabelClassification/eng/PascalVOC2007.py @@ -35,16 +35,18 @@ class VOC2007Classification(AbsTaskImageMultilabelClassification): dialect=[], modalities=["image"], sample_creation="created", - bibtex_citation="""@Article{Everingham10, - author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.", - title = "The Pascal Visual Object Classes (VOC) Challenge", - journal = "International Journal of Computer Vision", - volume = "88", - year = "2010", - number = "2", - month = jun, - pages = "303--338", - }""", + bibtex_citation=r""" +@article{Everingham10, + author = {Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.}, + journal = {International Journal of Computer Vision}, + month = jun, + number = {2}, + pages = {303--338}, + title = {The Pascal Visual Object Classes (VOC) Challenge}, + volume = {88}, + year = {2010}, +} +""", descriptive_stats={ "n_samples": {"test": 4952}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py b/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py index 8a227494ae..f538c5b716 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py +++ b/mteb/tasks/Image/ImageTextPairClassification/AROCocoOrder.py @@ -41,12 +41,14 @@ class AROCocoOrder(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{yuksekgonul2023and, - title={When and why vision-language models behave like bags-of-words, and what to do about it?}, - author={Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, - booktitle={The Eleventh International Conference on Learning Representations}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{yuksekgonul2023and, + author = {Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, + booktitle = {The Eleventh International Conference on Learning Representations}, + title = {When and why vision-language models behave like bags-of-words, and what to do about it?}, + year = {2023}, +} +""", descriptive_stats={ "n_samples": {"test": 25010}, "avg_character_length": {"test": 1}, diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py b/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py index 78fc0b8c79..b3a44cfd3a 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py +++ b/mteb/tasks/Image/ImageTextPairClassification/AROFlickrOrder.py @@ -41,12 +41,14 @@ class AROFlickrOrder(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{yuksekgonul2023and, - title={When and why vision-language models behave like bags-of-words, and what to do about it?}, - author={Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, - booktitle={The Eleventh International Conference on Learning Representations}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{yuksekgonul2023and, + author = {Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, + booktitle = {The Eleventh International Conference on Learning Representations}, + title = {When and why vision-language models behave like bags-of-words, and what to do about it?}, + year = {2023}, +} +""", descriptive_stats={ "n_samples": {"test": 5000}, "avg_character_length": {"test": 1}, diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROVisualAttribution.py b/mteb/tasks/Image/ImageTextPairClassification/AROVisualAttribution.py index b43ac87a00..d051e59470 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/AROVisualAttribution.py +++ b/mteb/tasks/Image/ImageTextPairClassification/AROVisualAttribution.py @@ -34,12 +34,14 @@ class AROVisualAttribution(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{yuksekgonul2023and, - title={When and why vision-language models behave like bags-of-words, and what to do about it?}, - author={Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, - booktitle={The Eleventh International Conference on Learning Representations}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{yuksekgonul2023and, + author = {Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, + booktitle = {The Eleventh International Conference on Learning Representations}, + title = {When and why vision-language models behave like bags-of-words, and what to do about it?}, + year = {2023}, +} +""", descriptive_stats={ "n_samples": {"test": 28748}, "avg_character_length": {"test": 1}, diff --git a/mteb/tasks/Image/ImageTextPairClassification/AROVisualRelation.py b/mteb/tasks/Image/ImageTextPairClassification/AROVisualRelation.py index 1d74de646c..170e0f07cb 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/AROVisualRelation.py +++ b/mteb/tasks/Image/ImageTextPairClassification/AROVisualRelation.py @@ -34,12 +34,14 @@ class AROVisualRelation(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@inproceedings{yuksekgonul2023and, - title={When and why vision-language models behave like bags-of-words, and what to do about it?}, - author={Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, - booktitle={The Eleventh International Conference on Learning Representations}, - year={2023} -}""", + bibtex_citation=r""" +@inproceedings{yuksekgonul2023and, + author = {Yuksekgonul, Mert and Bianchi, Federico and Kalluri, Pratyusha and Jurafsky, Dan and Zou, James}, + booktitle = {The Eleventh International Conference on Learning Representations}, + title = {When and why vision-language models behave like bags-of-words, and what to do about it?}, + year = {2023}, +} +""", descriptive_stats={ "n_samples": {"test": 23937}, "avg_character_length": {"test": 1}, diff --git a/mteb/tasks/Image/ImageTextPairClassification/ImageCoDe.py b/mteb/tasks/Image/ImageTextPairClassification/ImageCoDe.py index e2455c2cd8..5361a0dce6 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/ImageCoDe.py +++ b/mteb/tasks/Image/ImageTextPairClassification/ImageCoDe.py @@ -45,12 +45,14 @@ class ImageCoDe(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="found", - bibtex_citation="""@article{krojer2022image, - title={Image retrieval from contextual descriptions}, - author={Krojer, Benno and Adlakha, Vaibhav and Vineet, Vibhav and Goyal, Yash and Ponti, Edoardo and Reddy, Siva}, - journal={arXiv preprint arXiv:2203.15867}, - year={2022} -}""", + bibtex_citation=r""" +@article{krojer2022image, + author = {Krojer, Benno and Adlakha, Vaibhav and Vineet, Vibhav and Goyal, Yash and Ponti, Edoardo and Reddy, Siva}, + journal = {arXiv preprint arXiv:2203.15867}, + title = {Image retrieval from contextual descriptions}, + year = {2022}, +} +""", descriptive_stats={ "n_samples": {"test": 25322}, "avg_character_length": { diff --git a/mteb/tasks/Image/ImageTextPairClassification/SugarCrepe.py b/mteb/tasks/Image/ImageTextPairClassification/SugarCrepe.py index 94114f100e..0225865cb2 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/SugarCrepe.py +++ b/mteb/tasks/Image/ImageTextPairClassification/SugarCrepe.py @@ -36,13 +36,15 @@ class SugarCrepe(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@article{hsieh2024sugarcrepe, - title={Sugarcrepe: Fixing hackable benchmarks for vision-language compositionality}, - author={Hsieh, Cheng-Yu and Zhang, Jieyu and Ma, Zixian and Kembhavi, Aniruddha and Krishna, Ranjay}, - journal={Advances in neural information processing systems}, - volume={36}, - year={2024} -}""", + bibtex_citation=r""" +@article{hsieh2024sugarcrepe, + author = {Hsieh, Cheng-Yu and Zhang, Jieyu and Ma, Zixian and Kembhavi, Aniruddha and Krishna, Ranjay}, + journal = {Advances in neural information processing systems}, + title = {Sugarcrepe: Fixing hackable benchmarks for vision-language compositionality}, + volume = {36}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 7511}, "avg_character_length": {"test": 1}, diff --git a/mteb/tasks/Image/ImageTextPairClassification/Winoground.py b/mteb/tasks/Image/ImageTextPairClassification/Winoground.py index ff0c435fb7..f226a55466 100644 --- a/mteb/tasks/Image/ImageTextPairClassification/Winoground.py +++ b/mteb/tasks/Image/ImageTextPairClassification/Winoground.py @@ -34,15 +34,17 @@ class Winoground(AbsTaskImageTextPairClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@misc{thrush2022winogroundprobingvisionlanguage, - title={Winoground: Probing Vision and Language Models for Visio-Linguistic Compositionality}, - author={Tristan Thrush and Ryan Jiang and Max Bartolo and Amanpreet Singh and Adina Williams and Douwe Kiela and Candace Ross}, - year={2022}, - eprint={2204.03162}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/2204.03162}, - }""", + bibtex_citation=r""" +@misc{thrush2022winogroundprobingvisionlanguage, + archiveprefix = {arXiv}, + author = {Tristan Thrush and Ryan Jiang and Max Bartolo and Amanpreet Singh and Adina Williams and Douwe Kiela and Candace Ross}, + eprint = {2204.03162}, + primaryclass = {cs.CV}, + title = {Winoground: Probing Vision and Language Models for Visio-Linguistic Compositionality}, + url = {https://arxiv.org/abs/2204.03162}, + year = {2022}, +} +""", descriptive_stats={ "n_samples": {"test": 400}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/VisualSTS/eng/STS12VisualSTS.py b/mteb/tasks/Image/VisualSTS/eng/STS12VisualSTS.py index 3a99e9fc47..1bc81485c0 100644 --- a/mteb/tasks/Image/VisualSTS/eng/STS12VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/eng/STS12VisualSTS.py @@ -26,12 +26,14 @@ class STS12VisualSTS(AbsTaskVisualSTS): annotations_creators="human-annotated", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 5342}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/VisualSTS/eng/STS13VisualSTS.py b/mteb/tasks/Image/VisualSTS/eng/STS13VisualSTS.py index b66678c851..fde965fb93 100644 --- a/mteb/tasks/Image/VisualSTS/eng/STS13VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/eng/STS13VisualSTS.py @@ -26,12 +26,14 @@ class STS13VisualSTS(AbsTaskVisualSTS): annotations_creators="human-annotated", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 1500}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/VisualSTS/eng/STS14VisualSTS.py b/mteb/tasks/Image/VisualSTS/eng/STS14VisualSTS.py index 0820ed7823..39c9d725e2 100644 --- a/mteb/tasks/Image/VisualSTS/eng/STS14VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/eng/STS14VisualSTS.py @@ -27,12 +27,14 @@ class STS14VisualSTS(AbsTaskVisualSTS): annotations_creators="derived", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 3750}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/VisualSTS/eng/STS15VisualSTS.py b/mteb/tasks/Image/VisualSTS/eng/STS15VisualSTS.py index 8a9b8c682b..3ca654b2e4 100644 --- a/mteb/tasks/Image/VisualSTS/eng/STS15VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/eng/STS15VisualSTS.py @@ -26,12 +26,14 @@ class STS15VisualSTS(AbsTaskVisualSTS): annotations_creators="human-annotated", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 3000}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/VisualSTS/eng/STS16VisualSTS.py b/mteb/tasks/Image/VisualSTS/eng/STS16VisualSTS.py index ea82fa5a8b..40b52e8c1f 100644 --- a/mteb/tasks/Image/VisualSTS/eng/STS16VisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/eng/STS16VisualSTS.py @@ -26,12 +26,14 @@ class STS16VisualSTS(AbsTaskVisualSTS): annotations_creators="human-annotated", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 1186}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py b/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py index 2bf15406f3..06471ba36b 100644 --- a/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/multilingual/STS17MultilingualVisualSTS.py @@ -46,12 +46,14 @@ class STS17MultilingualVisualSTS(AbsTaskVisualSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 10692}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py b/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py index 9cab4e2f45..b32cf20711 100644 --- a/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py +++ b/mteb/tasks/Image/VisualSTS/multilingual/STSBenchmarkMultilingualVisualSTS.py @@ -47,12 +47,14 @@ class STSBenchmarkMultilingualVisualSTS(AbsTaskVisualSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="rendered", - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"dev": 15000, "test": 13790}, "avg_character_length": {"dev": 1.0, "test": 1.0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Birdsnap.py b/mteb/tasks/Image/ZeroShotClassification/eng/Birdsnap.py index dfa51d7e0e..280c2f2ee5 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/Birdsnap.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/Birdsnap.py @@ -31,13 +31,15 @@ class BirdsnapZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@InProceedings{Berg_2014_CVPR, - author = {Berg, Thomas and Liu, Jiongxin and Woo Lee, Seung and Alexander, Michelle L. and Jacobs, David W. and Belhumeur, Peter N.}, - title = {Birdsnap: Large-scale Fine-grained Visual Categorization of Birds}, - booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - month = {June}, - year = {2014} - }""", + bibtex_citation=r""" +@inproceedings{Berg_2014_CVPR, + author = {Berg, Thomas and Liu, Jiongxin and Woo Lee, Seung and Alexander, Michelle L. and Jacobs, David W. and Belhumeur, Peter N.}, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + title = {Birdsnap: Large-scale Fine-grained Visual Categorization of Birds}, + year = {2014}, +} +""", descriptive_stats={ "n_samples": {"test": 1851}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/CIFAR.py b/mteb/tasks/Image/ZeroShotClassification/eng/CIFAR.py index eaaed65f3f..56ce00d786 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/CIFAR.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/CIFAR.py @@ -31,12 +31,14 @@ class CIFAR10ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, - author = {Alex Krizhevsky}, - title = {Learning multiple layers of features from tiny images}, - institution = {}, - year = {2009} - }""", + bibtex_citation=r""" +@techreport{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + institution = {}, + title = {Learning multiple layers of features from tiny images}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, @@ -76,13 +78,14 @@ class CIFAR100ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation=""" @TECHREPORT{Krizhevsky09learningmultiple, - author = {Alex Krizhevsky}, - title = {Learning multiple layers of features from tiny images}, - institution = {}, - year = {2009} - } - """, + bibtex_citation=r""" +@techreport{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + institution = {}, + title = {Learning multiple layers of features from tiny images}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/CLEVR.py b/mteb/tasks/Image/ZeroShotClassification/eng/CLEVR.py index 4dc8768443..a44c0141ca 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/CLEVR.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/CLEVR.py @@ -28,14 +28,15 @@ class CLEVR(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""\ -@InProceedings{Johnson_2017_CVPR, -author = {Johnson, Justin and Hariharan, Bharath and van der Maaten, Laurens and Fei-Fei, Li and Lawrence Zitnick, C. and Girshick, Ross}, -title = {CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning}, -booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, -month = {July}, -year = {2017} -}""", + bibtex_citation=r""" +@inproceedings{Johnson_2017_CVPR, + author = {Johnson, Justin and Hariharan, Bharath and van der Maaten, Laurens and Fei-Fei, Li and Lawrence Zitnick, C. and Girshick, Ross}, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {July}, + title = {CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning}, + year = {2017}, +} +""", descriptive_stats={ "n_samples": {"test": 15000}, "avg_character_length": {"test": 0}, @@ -80,14 +81,15 @@ class CLEVRCount(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""\ -@InProceedings{Johnson_2017_CVPR, -author = {Johnson, Justin and Hariharan, Bharath and van der Maaten, Laurens and Fei-Fei, Li and Lawrence Zitnick, C. and Girshick, Ross}, -title = {CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning}, -booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, -month = {July}, -year = {2017} -}""", + bibtex_citation=r""" +@inproceedings{Johnson_2017_CVPR, + author = {Johnson, Justin and Hariharan, Bharath and van der Maaten, Laurens and Fei-Fei, Li and Lawrence Zitnick, C. and Girshick, Ross}, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {July}, + title = {CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning}, + year = {2017}, +} +""", descriptive_stats={ "n_samples": {"test": 15000}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Caltech101.py b/mteb/tasks/Image/ZeroShotClassification/eng/Caltech101.py index 92738c5047..6e897d0cdf 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/Caltech101.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/Caltech101.py @@ -33,16 +33,19 @@ class Caltech101ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{1384978, - author={Li Fei-Fei and Fergus, R. and Perona, P.}, - booktitle={2004 Conference on Computer Vision and Pattern Recognition Workshop}, - title={Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories}, - year={2004}, - volume={}, - number={}, - pages={178-178}, - keywords={Bayesian methods;Testing;Humans;Maximum likelihood estimation;Assembly;Shape;Machine vision;Image recognition;Parameter estimation;Image databases}, - doi={10.1109/CVPR.2004.383}}""", + bibtex_citation=r""" +@inproceedings{1384978, + author = {Li Fei-Fei and Fergus, R. and Perona, P.}, + booktitle = {2004 Conference on Computer Vision and Pattern Recognition Workshop}, + doi = {10.1109/CVPR.2004.383}, + keywords = {Bayesian methods;Testing;Humans;Maximum likelihood estimation;Assembly;Shape;Machine vision;Image recognition;Parameter estimation;Image databases}, + number = {}, + pages = {178-178}, + title = {Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories}, + volume = {}, + year = {2004}, +} +""", descriptive_stats={ "n_samples": {"test": 6084}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py b/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py index 3009ef7b59..67bddc3c4c 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py @@ -33,12 +33,14 @@ class Country211ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@article{radford2021learning, - title={Learning Transferable Visual Models From Natural Language Supervision}, - author={Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others}, - journal={arXiv preprint arXiv:2103.00020}, - year={2021} - }""", + bibtex_citation=r""" +@article{radford2021learning, + author = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others}, + journal = {arXiv preprint arXiv:2103.00020}, + title = {Learning Transferable Visual Models From Natural Language Supervision}, + year = {2021}, +} +""", descriptive_stats={ "n_samples": {"test": 21100}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/DTD.py b/mteb/tasks/Image/ZeroShotClassification/eng/DTD.py index 4316d2a934..5be1a00507 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/DTD.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/DTD.py @@ -31,11 +31,14 @@ class DTDZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@InProceedings{cimpoi14describing, - Author = {M. Cimpoi and S. Maji and I. Kokkinos and S. Mohamed and and A. Vedaldi}, - Title = {Describing Textures in the Wild}, - Booktitle = {Proceedings of the {IEEE} Conf. on Computer Vision and Pattern Recognition ({CVPR})}, - Year = {2014}}""", + bibtex_citation=r""" +@inproceedings{cimpoi14describing, + author = {M. Cimpoi and S. Maji and I. Kokkinos and S. Mohamed and and A. Vedaldi}, + booktitle = {Proceedings of the {IEEE} Conf. on Computer Vision and Pattern Recognition ({CVPR})}, + title = {Describing Textures in the Wild}, + year = {2014}, +} +""", descriptive_stats={ "n_samples": {"test": 1880}, "avg_character_length": {"test": 456}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/EuroSAT.py b/mteb/tasks/Image/ZeroShotClassification/eng/EuroSAT.py index 5333815901..028ecde4ff 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/EuroSAT.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/EuroSAT.py @@ -31,16 +31,19 @@ class EuroSATZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@ARTICLE{8736785, - author={Helber, Patrick and Bischke, Benjamin and Dengel, Andreas and Borth, Damian}, - journal={IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing}, - title={EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification}, - year={2019}, - volume={12}, - number={7}, - pages={2217-2226}, - keywords={Satellites;Earth;Remote sensing;Machine learning;Spatial resolution;Feature extraction;Benchmark testing;Dataset;deep convolutional neural network;deep learning;earth observation;land cover classification;land use classification;machine learning;remote sensing;satellite image classification;satellite images}, - doi={10.1109/JSTARS.2019.2918242}}""", + bibtex_citation=r""" +@article{8736785, + author = {Helber, Patrick and Bischke, Benjamin and Dengel, Andreas and Borth, Damian}, + doi = {10.1109/JSTARS.2019.2918242}, + journal = {IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing}, + keywords = {Satellites;Earth;Remote sensing;Machine learning;Spatial resolution;Feature extraction;Benchmark testing;Dataset;deep convolutional neural network;deep learning;earth observation;land cover classification;land use classification;machine learning;remote sensing;satellite image classification;satellite images}, + number = {7}, + pages = {2217-2226}, + title = {EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification}, + volume = {12}, + year = {2019}, +} +""", descriptive_stats={ "n_samples": {"test": 5400}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/FER2013.py b/mteb/tasks/Image/ZeroShotClassification/eng/FER2013.py index e1d9d0965f..0dc00a62b7 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/FER2013.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/FER2013.py @@ -31,15 +31,17 @@ class FER2013ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@misc{goodfellow2015explainingharnessingadversarialexamples, - title={Explaining and Harnessing Adversarial Examples}, - author={Ian J. Goodfellow and Jonathon Shlens and Christian Szegedy}, - year={2015}, - eprint={1412.6572}, - archivePrefix={arXiv}, - primaryClass={stat.ML}, - url={https://arxiv.org/abs/1412.6572}, - }""", + bibtex_citation=r""" +@misc{goodfellow2015explainingharnessingadversarialexamples, + archiveprefix = {arXiv}, + author = {Ian J. Goodfellow and Jonathon Shlens and Christian Szegedy}, + eprint = {1412.6572}, + primaryclass = {stat.ML}, + title = {Explaining and Harnessing Adversarial Examples}, + url = {https://arxiv.org/abs/1412.6572}, + year = {2015}, +} +""", descriptive_stats={ "n_samples": {"test": 7178}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/FGVCAircraft.py b/mteb/tasks/Image/ZeroShotClassification/eng/FGVCAircraft.py index 183ecceb51..7e8ea8257c 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/FGVCAircraft.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/FGVCAircraft.py @@ -32,15 +32,17 @@ class FGVCAircraftZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft, - title={Fine-Grained Visual Classification of Aircraft}, - author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, - year={2013}, - eprint={1306.5151}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/1306.5151}, - }""", + bibtex_citation=r""" +@misc{maji2013finegrainedvisualclassificationaircraft, + archiveprefix = {arXiv}, + author = {Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, + eprint = {1306.5151}, + primaryclass = {cs.CV}, + title = {Fine-Grained Visual Classification of Aircraft}, + url = {https://arxiv.org/abs/1306.5151}, + year = {2013}, +} +""", descriptive_stats={ "n_samples": {"test": 3333}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Food101.py b/mteb/tasks/Image/ZeroShotClassification/eng/Food101.py index a8e269c7e0..e53022f53c 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/Food101.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/Food101.py @@ -31,12 +31,14 @@ class Food101ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation=""" @inproceedings{bossard14, - title = {Food-101 -- Mining Discriminative Components with Random Forests}, - author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc}, - booktitle = {European Conference on Computer Vision}, - year = {2014} - }""", + bibtex_citation=r""" +@inproceedings{bossard14, + author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc}, + booktitle = {European Conference on Computer Vision}, + title = {Food-101 -- Mining Discriminative Components with Random Forests}, + year = {2014}, +} +""", descriptive_stats={ "n_samples": {"validation": 25300}, "avg_character_length": {"validation": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py b/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py index f3b9d751f0..d68d8928b4 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py @@ -33,16 +33,19 @@ class GTSRBZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{6033395, - author={Stallkamp, Johannes and Schlipsing, Marc and Salmen, Jan and Igel, Christian}, - booktitle={The 2011 International Joint Conference on Neural Networks}, - title={The German Traffic Sign Recognition Benchmark: A multi-class classification competition}, - year={2011}, - volume={}, - number={}, - pages={1453-1460}, - keywords={Humans;Training;Image color analysis;Benchmark testing;Lead;Histograms;Image resolution}, - doi={10.1109/IJCNN.2011.6033395}}""", + bibtex_citation=r""" +@inproceedings{6033395, + author = {Stallkamp, Johannes and Schlipsing, Marc and Salmen, Jan and Igel, Christian}, + booktitle = {The 2011 International Joint Conference on Neural Networks}, + doi = {10.1109/IJCNN.2011.6033395}, + keywords = {Humans;Training;Image color analysis;Benchmark testing;Lead;Histograms;Image resolution}, + number = {}, + pages = {1453-1460}, + title = {The German Traffic Sign Recognition Benchmark: A multi-class classification competition}, + volume = {}, + year = {2011}, +} +""", descriptive_stats={ "n_samples": {"test": 12630}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py index 4da9dca851..179fb953ce 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py @@ -33,14 +33,16 @@ class Imagenet1kZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@article{deng2009imagenet, - title={ImageNet: A large-scale hierarchical image database}, - author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, - journal={2009 IEEE Conference on Computer Vision and Pattern Recognition}, - pages={248--255}, - year={2009}, - organization={Ieee} - }""", + bibtex_citation=r""" +@article{deng2009imagenet, + author = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, + journal = {2009 IEEE Conference on Computer Vision and Pattern Recognition}, + organization = {Ieee}, + pages = {248--255}, + title = {ImageNet: A large-scale hierarchical image database}, + year = {2009}, +} +""", descriptive_stats={ "n_samples": {"test": 37200}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/MNIST.py b/mteb/tasks/Image/ZeroShotClassification/eng/MNIST.py index 798ba5457d..e8b14abbe8 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/MNIST.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/MNIST.py @@ -31,13 +31,15 @@ class MNISTZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@article{lecun2010mnist, - title={MNIST handwritten digit database}, - author={LeCun, Yann and Cortes, Corinna and Burges, CJ}, - journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist}, - volume={2}, - year={2010} - }""", + bibtex_citation=r""" +@article{lecun2010mnist, + author = {LeCun, Yann and Cortes, Corinna and Burges, CJ}, + journal = {ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist}, + title = {MNIST handwritten digit database}, + volume = {2}, + year = {2010}, +} +""", descriptive_stats={ "n_samples": {"test": 10000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/OxfordPets.py b/mteb/tasks/Image/ZeroShotClassification/eng/OxfordPets.py index 653052eee7..1eef6bbd49 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/OxfordPets.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/OxfordPets.py @@ -31,15 +31,17 @@ class OxfordPetsZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""@misc{maji2013finegrainedvisualclassificationaircraft, - title={Fine-Grained Visual Classification of Aircraft}, - author={Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, - year={2013}, - eprint={1306.5151}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/1306.5151}, - }""", + bibtex_citation=r""" +@misc{maji2013finegrainedvisualclassificationaircraft, + archiveprefix = {arXiv}, + author = {Subhransu Maji and Esa Rahtu and Juho Kannala and Matthew Blaschko and Andrea Vedaldi}, + eprint = {1306.5151}, + primaryclass = {cs.CV}, + title = {Fine-Grained Visual Classification of Aircraft}, + url = {https://arxiv.org/abs/1306.5151}, + year = {2013}, +} +""", descriptive_stats={ "n_samples": {"test": 3669}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py b/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py index d6ec16b846..50fe78866e 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py @@ -33,26 +33,28 @@ class PatchCamelyonZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@InProceedings{10.1007/978-3-030-00934-2_24, -author="Veeling, Bastiaan S. + bibtex_citation=r""" +@inproceedings{10.1007/978-3-030-00934-2_24, + abstract = {We propose a new model for digital pathology segmentation, based on the observation that histopathology images are inherently symmetric under rotation and reflection. Utilizing recent findings on rotation equivariant CNNs, the proposed model leverages these symmetries in a principled manner. We present a visual analysis showing improved stability on predictions, and demonstrate that exploiting rotation equivariance significantly improves tumor detection performance on a challenging lymph node metastases dataset. We further present a novel derived dataset to enable principled comparison of machine learning models, in combination with an initial benchmark. Through this dataset, the task of histopathology diagnosis becomes accessible as a challenging benchmark for fundamental machine learning research.}, + address = {Cham}, + author = {Veeling, Bastiaan S. and Linmans, Jasper and Winkens, Jim and Cohen, Taco -and Welling, Max", -editor="Frangi, Alejandro F. +and Welling, Max}, + booktitle = {Medical Image Computing and Computer Assisted Intervention -- MICCAI 2018}, + editor = {Frangi, Alejandro F. and Schnabel, Julia A. and Davatzikos, Christos and Alberola-L{\'o}pez, Carlos -and Fichtinger, Gabor", -title="Rotation Equivariant CNNs for Digital Pathology", -booktitle="Medical Image Computing and Computer Assisted Intervention -- MICCAI 2018", -year="2018", -publisher="Springer International Publishing", -address="Cham", -pages="210--218", -abstract="We propose a new model for digital pathology segmentation, based on the observation that histopathology images are inherently symmetric under rotation and reflection. Utilizing recent findings on rotation equivariant CNNs, the proposed model leverages these symmetries in a principled manner. We present a visual analysis showing improved stability on predictions, and demonstrate that exploiting rotation equivariance significantly improves tumor detection performance on a challenging lymph node metastases dataset. We further present a novel derived dataset to enable principled comparison of machine learning models, in combination with an initial benchmark. Through this dataset, the task of histopathology diagnosis becomes accessible as a challenging benchmark for fundamental machine learning research.", -isbn="978-3-030-00934-2" -}""", +and Fichtinger, Gabor}, + isbn = {978-3-030-00934-2}, + pages = {210--218}, + publisher = {Springer International Publishing}, + title = {Rotation Equivariant CNNs for Digital Pathology}, + year = {2018}, +} +""", descriptive_stats={ "n_samples": {"test": 32768}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/RESISC45.py b/mteb/tasks/Image/ZeroShotClassification/eng/RESISC45.py index 4ba9d0b21a..1b16853cd0 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/RESISC45.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/RESISC45.py @@ -31,16 +31,19 @@ class RESISC45ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@ARTICLE{7891544, - author={Cheng, Gong and Han, Junwei and Lu, Xiaoqiang}, - journal={Proceedings of the IEEE}, - title={Remote Sensing Image Scene Classification: Benchmark and State of the Art}, - year={2017}, - volume={105}, - number={10}, - pages={1865-1883}, - keywords={Remote sensing;Benchmark testing;Spatial resolution;Social network services;Satellites;Image analysis;Machine learning;Unsupervised learning;Classification;Benchmark data set;deep learning;handcrafted features;remote sensing image;scene classification;unsupervised feature learning}, - doi={10.1109/JPROC.2017.2675998}}""", + bibtex_citation=r""" +@article{7891544, + author = {Cheng, Gong and Han, Junwei and Lu, Xiaoqiang}, + doi = {10.1109/JPROC.2017.2675998}, + journal = {Proceedings of the IEEE}, + keywords = {Remote sensing;Benchmark testing;Spatial resolution;Social network services;Satellites;Image analysis;Machine learning;Unsupervised learning;Classification;Benchmark data set;deep learning;handcrafted features;remote sensing image;scene classification;unsupervised feature learning}, + number = {10}, + pages = {1865-1883}, + title = {Remote Sensing Image Scene Classification: Benchmark and State of the Art}, + volume = {105}, + year = {2017}, +} +""", descriptive_stats={ "n_samples": {"test": 6300}, "avg_character_length": {"test": 256}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/STL10.py b/mteb/tasks/Image/ZeroShotClassification/eng/STL10.py index 4aafeb043d..7bceacd8ed 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/STL10.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/STL10.py @@ -31,21 +31,23 @@ class STL10ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@InProceedings{pmlr-v15-coates11a, - title = {An Analysis of Single-Layer Networks in Unsupervised Feature Learning}, - author = {Coates, Adam and Ng, Andrew and Lee, Honglak}, - booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, - pages = {215--223}, - year = {2011}, - editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, - volume = {15}, - series = {Proceedings of Machine Learning Research}, - address = {Fort Lauderdale, FL, USA}, - month = {11--13 Apr}, - publisher = {PMLR}, - pdf = {http://proceedings.mlr.press/v15/coates11a/coates11a.pdf}, - url = {https://proceedings.mlr.press/v15/coates11a.html}, - }""", + bibtex_citation=r""" +@inproceedings{pmlr-v15-coates11a, + address = {Fort Lauderdale, FL, USA}, + author = {Coates, Adam and Ng, Andrew and Lee, Honglak}, + booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, + editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, + month = {11--13 Apr}, + pages = {215--223}, + pdf = {http://proceedings.mlr.press/v15/coates11a/coates11a.pdf}, + publisher = {PMLR}, + series = {Proceedings of Machine Learning Research}, + title = {An Analysis of Single-Layer Networks in Unsupervised Feature Learning}, + url = {https://proceedings.mlr.press/v15/coates11a.html}, + volume = {15}, + year = {2011}, +} +""", descriptive_stats={ "n_samples": {"test": 8000}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/SUN397.py b/mteb/tasks/Image/ZeroShotClassification/eng/SUN397.py index 10d7a42794..b307621503 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/SUN397.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/SUN397.py @@ -31,15 +31,18 @@ class SUN397ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@INPROCEEDINGS{5539970, - author={Xiao, Jianxiong and Hays, James and Ehinger, Krista A. and Oliva, Aude and Torralba, Antonio}, - booktitle={2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, - title={SUN database: Large-scale scene recognition from abbey to zoo}, - year={2010}, - volume={}, - number={}, - pages={3485-3492}, - doi={10.1109/CVPR.2010.5539970}}""", + bibtex_citation=r""" +@inproceedings{5539970, + author = {Xiao, Jianxiong and Hays, James and Ehinger, Krista A. and Oliva, Aude and Torralba, Antonio}, + booktitle = {2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, + doi = {10.1109/CVPR.2010.5539970}, + number = {}, + pages = {3485-3492}, + title = {SUN database: Large-scale scene recognition from abbey to zoo}, + volume = {}, + year = {2010}, +} +""", descriptive_stats={ "n_samples": {"test": 21750}, "avg_character_length": {"test": 256}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/SciMMIR.py b/mteb/tasks/Image/ZeroShotClassification/eng/SciMMIR.py index 19b34bb174..51fc044236 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/SciMMIR.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/SciMMIR.py @@ -28,16 +28,17 @@ class SciMMIR(AbsTaskZeroShotClassification): dialect=[], modalities=["text", "image"], sample_creation="created", - bibtex_citation="""\ + bibtex_citation=r""" @misc{wu2024scimmirbenchmarkingscientificmultimodal, - title={SciMMIR: Benchmarking Scientific Multi-modal Information Retrieval}, - author={Siwei Wu and Yizhi Li and Kang Zhu and Ge Zhang and Yiming Liang and Kaijing Ma and Chenghao Xiao and Haoran Zhang and Bohao Yang and Wenhu Chen and Wenhao Huang and Noura Al Moubayed and Jie Fu and Chenghua Lin}, - year={2024}, - eprint={2401.13478}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2401.13478}, -}""", + archiveprefix = {arXiv}, + author = {Siwei Wu and Yizhi Li and Kang Zhu and Ge Zhang and Yiming Liang and Kaijing Ma and Chenghao Xiao and Haoran Zhang and Bohao Yang and Wenhu Chen and Wenhao Huang and Noura Al Moubayed and Jie Fu and Chenghua Lin}, + eprint = {2401.13478}, + primaryclass = {cs.IR}, + title = {SciMMIR: Benchmarking Scientific Multi-modal Information Retrieval}, + url = {https://arxiv.org/abs/2401.13478}, + year = {2024}, +} +""", descriptive_stats={ "n_samples": {"test": 16263}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/StanfordCars.py b/mteb/tasks/Image/ZeroShotClassification/eng/StanfordCars.py index 66a380330e..134de6d80a 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/StanfordCars.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/StanfordCars.py @@ -31,12 +31,14 @@ class StanfordCarsZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@inproceedings{Krause2013CollectingAL, - title={Collecting a Large-scale Dataset of Fine-grained Cars}, - author={Jonathan Krause and Jia Deng and Michael Stark and Li Fei-Fei}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:16632981} - }""", + bibtex_citation=r""" +@inproceedings{Krause2013CollectingAL, + author = {Jonathan Krause and Jia Deng and Michael Stark and Li Fei-Fei}, + title = {Collecting a Large-scale Dataset of Fine-grained Cars}, + url = {https://api.semanticscholar.org/CorpusID:16632981}, + year = {2013}, +} +""", descriptive_stats={ "n_samples": {"test": 8041}, "avg_character_length": {"test": 431.4}, diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/UCF101.py b/mteb/tasks/Image/ZeroShotClassification/eng/UCF101.py index f4baa77313..dd874d9356 100644 --- a/mteb/tasks/Image/ZeroShotClassification/eng/UCF101.py +++ b/mteb/tasks/Image/ZeroShotClassification/eng/UCF101.py @@ -35,15 +35,17 @@ class UCF101ZeroShotClassification(AbsTaskZeroShotClassification): dialect=[], modalities=["image", "text"], sample_creation="created", - bibtex_citation="""@misc{soomro2012ucf101dataset101human, - title={UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild}, - author={Khurram Soomro and Amir Roshan Zamir and Mubarak Shah}, - year={2012}, - eprint={1212.0402}, - archivePrefix={arXiv}, - primaryClass={cs.CV}, - url={https://arxiv.org/abs/1212.0402}, -}""", + bibtex_citation=r""" +@misc{soomro2012ucf101dataset101human, + archiveprefix = {arXiv}, + author = {Khurram Soomro and Amir Roshan Zamir and Mubarak Shah}, + eprint = {1212.0402}, + primaryclass = {cs.CV}, + title = {UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild}, + url = {https://arxiv.org/abs/1212.0402}, + year = {2012}, +} +""", descriptive_stats={ "n_samples": {"test": 697222}, "avg_character_length": {"test": 0}, diff --git a/mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py b/mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py index dc9614a28b..6717d5f80b 100644 --- a/mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py +++ b/mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py @@ -27,12 +27,14 @@ class Core17InstructionRetrieval(AbsTaskInstructionRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{weller2024followir, - title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, - author={Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, - year={2024}, - eprint={2403.15246}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{weller2024followir, + archiveprefix = {arXiv}, + author = {Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, + eprint = {2403.15246}, + primaryclass = {cs.IR}, + title = {FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py b/mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py index e20833128d..3c4c628589 100644 --- a/mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py +++ b/mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py @@ -27,12 +27,14 @@ class News21InstructionRetrieval(AbsTaskInstructionRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{weller2024followir, - title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, - author={Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, - year={2024}, - eprint={2403.15246}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{weller2024followir, + archiveprefix = {arXiv}, + author = {Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, + eprint = {2403.15246}, + primaryclass = {cs.IR}, + title = {FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py b/mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py index 6056624309..f740d37ed9 100644 --- a/mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py +++ b/mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py @@ -27,12 +27,14 @@ class Robust04InstructionRetrieval(AbsTaskInstructionRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{weller2024followir, - title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, - author={Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, - year={2024}, - eprint={2403.15246}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{weller2024followir, + archiveprefix = {arXiv}, + author = {Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, + eprint = {2403.15246}, + primaryclass = {cs.IR}, + title = {FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py b/mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py index 9452beb8de..b738c5067b 100644 --- a/mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py +++ b/mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py @@ -194,12 +194,14 @@ class mFollowIRCrossLingual(MultilingualTask, AbsTaskInstructionRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{weller2024mfollowir, - title={{mFollowIR: a Multilingual Benchmark for Instruction Following in Retrieval}}, - author={Weller, Orion and Chang, Benjamin and Yang, Eugene and Yarmohammadi, Mahsa and Barham, Sam and MacAvaney, Sean and Cohan, Arman and Soldaini, Luca and Van Durme, Benjamin and Lawrie, Dawn}, - journal={arXiv preprint TODO}, - year={2024} -}""", + bibtex_citation=r""" +@article{weller2024mfollowir, + author = {Weller, Orion and Chang, Benjamin and Yang, Eugene and Yarmohammadi, Mahsa and Barham, Sam and MacAvaney, Sean and Cohan, Arman and Soldaini, Luca and Van Durme, Benjamin and Lawrie, Dawn}, + journal = {arXiv preprint TODO}, + title = {{mFollowIR: a Multilingual Benchmark for Instruction Following in Retrieval}}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): @@ -247,12 +249,14 @@ class mFollowIR(MultilingualTask, AbsTaskInstructionRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{weller2024mfollowir, - title={{mFollowIR: a Multilingual Benchmark for Instruction Following in Retrieval}}, - author={Weller, Orion and Chang, Benjamin and Yang, Eugene and Yarmohammadi, Mahsa and Barham, Sam and MacAvaney, Sean and Cohan, Arman and Soldaini, Luca and Van Durme, Benjamin and Lawrie, Dawn}, - journal={arXiv preprint TODO}, - year={2024} -}""", + bibtex_citation=r""" +@article{weller2024mfollowir, + author = {Weller, Orion and Chang, Benjamin and Yang, Eugene and Yarmohammadi, Mahsa and Barham, Sam and MacAvaney, Sean and Cohan, Arman and Soldaini, Luca and Van Durme, Benjamin and Lawrie, Dawn}, + journal = {arXiv preprint TODO}, + title = {{mFollowIR: a Multilingual Benchmark for Instruction Following in Retrieval}}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/MultiLabelClassification/kor/KorHateSpeechMLClassification.py b/mteb/tasks/MultiLabelClassification/kor/KorHateSpeechMLClassification.py index 42b5a40a45..8104875705 100644 --- a/mteb/tasks/MultiLabelClassification/kor/KorHateSpeechMLClassification.py +++ b/mteb/tasks/MultiLabelClassification/kor/KorHateSpeechMLClassification.py @@ -37,24 +37,25 @@ class KorHateSpeechMLClassification(AbsTaskMultilabelClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{lee-etal-2022-k, - title = "K-{MH}a{S}: A Multi-label Hate Speech Detection Dataset in {K}orean Online News Comment", - author = "Lee, Jean and - Lim, Taejun and - Lee, Heejun and - Jo, Bogeun and - Kim, Yangsok and - Yoon, Heegeun and - Han, Soyeon Caren", - booktitle = "Proceedings of the 29th International Conference on Computational Linguistics", - month = oct, - year = "2022", - address = "Gyeongju, Republic of Korea", - publisher = "International Committee on Computational Linguistics", - url = "https://aclanthology.org/2022.coling-1.311", - pages = "3530--3538", - }""", + bibtex_citation=r""" +@inproceedings{lee-etal-2022-k, + address = {Gyeongju, Republic of Korea}, + author = {Lee, Jean and +Lim, Taejun and +Lee, Heejun and +Jo, Bogeun and +Kim, Yangsok and +Yoon, Heegeun and +Han, Soyeon Caren}, + booktitle = {Proceedings of the 29th International Conference on Computational Linguistics}, + month = oct, + pages = {3530--3538}, + publisher = {International Committee on Computational Linguistics}, + title = {K-{MH}a{S}: A Multi-label Hate Speech Detection Dataset in {K}orean Online News Comment}, + url = {https://aclanthology.org/2022.coling-1.311}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/MultiLabelClassification/mlt/MalteseNewsClassification.py b/mteb/tasks/MultiLabelClassification/mlt/MalteseNewsClassification.py index 528d18f396..196396bc83 100644 --- a/mteb/tasks/MultiLabelClassification/mlt/MalteseNewsClassification.py +++ b/mteb/tasks/MultiLabelClassification/mlt/MalteseNewsClassification.py @@ -33,16 +33,18 @@ class MalteseNewsClassification(AbsTaskMultilabelClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{maltese-news-datasets, - title = "Topic Classification and Headline Generation for {M}altese using a Public News Corpus", - author = "Chaudhary, Amit Kumar and - Micallef, Kurt and - Borg, Claudia", - booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation", - month = may, - year = "2024", - publisher = "Association for Computational Linguistics", - }""", + bibtex_citation=r""" +@inproceedings{maltese-news-datasets, + author = {Chaudhary, Amit Kumar and +Micallef, Kurt and +Borg, Claudia}, + booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation}, + month = may, + publisher = {Association for Computational Linguistics}, + title = {Topic Classification and Headline Generation for {M}altese using a Public News Corpus}, + year = {2024}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/MultiLabelClassification/multilingual/MultiEURLEXMultilabelClassification.py b/mteb/tasks/MultiLabelClassification/multilingual/MultiEURLEXMultilabelClassification.py index d4dadd1d63..9b7787d185 100644 --- a/mteb/tasks/MultiLabelClassification/multilingual/MultiEURLEXMultilabelClassification.py +++ b/mteb/tasks/MultiLabelClassification/multilingual/MultiEURLEXMultilabelClassification.py @@ -55,19 +55,19 @@ class MultiEURLEXMultilabelClassification( annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" -@InProceedings{chalkidis-etal-2021-multieurlex, + bibtex_citation=r""" +@inproceedings{chalkidis-etal-2021-multieurlex, author = {Chalkidis, Ilias - and Fergadiotis, Manos - and Androutsopoulos, Ion}, - title = {MultiEURLEX -- A multi-lingual and multi-label legal document - classification dataset for zero-shot cross-lingual transfer}, +and Fergadiotis, Manos +and Androutsopoulos, Ion}, booktitle = {Proceedings of the 2021 Conference on Empirical Methods - in Natural Language Processing}, - year = {2021}, - publisher = {Association for Computational Linguistics}, +in Natural Language Processing}, location = {Punta Cana, Dominican Republic}, - url = {https://arxiv.org/abs/2109.00904} + publisher = {Association for Computational Linguistics}, + title = {MultiEURLEX -- A multi-lingual and multi-label legal document +classification dataset for zero-shot cross-lingual transfer}, + url = {https://arxiv.org/abs/2109.00904}, + year = {2021}, } - """, +""", ) diff --git a/mteb/tasks/MultiLabelClassification/por/BrazilianToxicTweetsClassification.py b/mteb/tasks/MultiLabelClassification/por/BrazilianToxicTweetsClassification.py index f56fa78d06..d2430d927b 100644 --- a/mteb/tasks/MultiLabelClassification/por/BrazilianToxicTweetsClassification.py +++ b/mteb/tasks/MultiLabelClassification/por/BrazilianToxicTweetsClassification.py @@ -33,21 +33,23 @@ class BrazilianToxicTweetsClassification(AbsTaskMultilabelClassification): annotations_creators="expert-annotated", dialect=["brazilian"], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/abs-2010-04543, - author = {Joao Augusto Leite and - Diego F. Silva and - Kalina Bontcheva and - Carolina Scarton}, - title = {Toxic Language Detection in Social Media for Brazilian Portuguese: - New Dataset and Multilingual Analysis}, - journal = {CoRR}, - volume = {abs/2010.04543}, - year = {2020}, - url = {https://arxiv.org/abs/2010.04543}, - eprinttype = {arXiv}, - eprint = {2010.04543}, - timestamp = {Tue, 15 Dec 2020 16:10:16 +0100}, - }""", + bibtex_citation=r""" +@article{DBLP:journals/corr/abs-2010-04543, + author = {Joao Augusto Leite and +Diego F. Silva and +Kalina Bontcheva and +Carolina Scarton}, + eprint = {2010.04543}, + eprinttype = {arXiv}, + journal = {CoRR}, + timestamp = {Tue, 15 Dec 2020 16:10:16 +0100}, + title = {Toxic Language Detection in Social Media for Brazilian Portuguese: +New Dataset and Multilingual Analysis}, + url = {https://arxiv.org/abs/2010.04543}, + volume = {abs/2010.04543}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/MultiLabelClassification/rus/CEDRClassification.py b/mteb/tasks/MultiLabelClassification/rus/CEDRClassification.py index 87795138d4..400dc0b9ae 100644 --- a/mteb/tasks/MultiLabelClassification/rus/CEDRClassification.py +++ b/mteb/tasks/MultiLabelClassification/rus/CEDRClassification.py @@ -28,15 +28,16 @@ class CEDRClassification(AbsTaskMultilabelClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{sboev2021data, - title={Data-Driven Model for Emotion Detection in Russian Texts}, - author={Sboev, Alexander and Naumov, Aleksandr and Rybka, Roman}, - journal={Procedia Computer Science}, - volume={190}, - pages={637--642}, - year={2021}, - publisher={Elsevier} - } - """, + bibtex_citation=r""" +@article{sboev2021data, + author = {Sboev, Alexander and Naumov, Aleksandr and Rybka, Roman}, + journal = {Procedia Computer Science}, + pages = {637--642}, + publisher = {Elsevier}, + title = {Data-Driven Model for Emotion Detection in Russian Texts}, + volume = {190}, + year = {2021}, +} +""", prompt="Given a comment as query, find expressed emotions (joy, sadness, surprise, fear, and anger)", ) diff --git a/mteb/tasks/MultiLabelClassification/rus/SensitiveTopicsClassification.py b/mteb/tasks/MultiLabelClassification/rus/SensitiveTopicsClassification.py index fc199313d6..d2bb2fea9f 100644 --- a/mteb/tasks/MultiLabelClassification/rus/SensitiveTopicsClassification.py +++ b/mteb/tasks/MultiLabelClassification/rus/SensitiveTopicsClassification.py @@ -28,33 +28,35 @@ class SensitiveTopicsClassification(AbsTaskMultilabelClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{babakov-etal-2021-detecting, - title = "Detecting Inappropriate Messages on Sensitive Topics that Could Harm a Company{'}s Reputation", - author = "Babakov, Nikolay and - Logacheva, Varvara and - Kozlova, Olga and - Semenov, Nikita and - Panchenko, Alexander", - editor = "Babych, Bogdan and - Kanishcheva, Olga and - Nakov, Preslav and - Piskorski, Jakub and - Pivovarova, Lidia and - Starko, Vasyl and - Steinberger, Josef and - Yangarber, Roman and - Marci{\'n}czuk, Micha{\l} and - Pollak, Senja and - P{\v{r}}ib{\'a}{\v{n}}, Pavel and - Robnik-{\v{S}}ikonja, Marko", - booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing", - month = apr, - year = "2021", - address = "Kiyv, Ukraine", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.bsnlp-1.4", - pages = "26--36", - abstract = "Not all topics are equally {``}flammable{''} in terms of toxicity: a calm discussion of turtles or fishing less often fuels inappropriate toxic dialogues than a discussion of politics or sexual minorities. We define a set of sensitive topics that can yield inappropriate and toxic messages and describe the methodology of collecting and labelling a dataset for appropriateness. While toxicity in user-generated data is well-studied, we aim at defining a more fine-grained notion of inappropriateness. The core of inappropriateness is that it can harm the reputation of a speaker. This is different from toxicity in two respects: (i) inappropriateness is topic-related, and (ii) inappropriate message is not toxic but still unacceptable. We collect and release two datasets for Russian: a topic-labelled dataset and an appropriateness-labelled dataset. We also release pre-trained classification models trained on this data.", - }""", + bibtex_citation=r""" +@inproceedings{babakov-etal-2021-detecting, + abstract = {Not all topics are equally {``}flammable{''} in terms of toxicity: a calm discussion of turtles or fishing less often fuels inappropriate toxic dialogues than a discussion of politics or sexual minorities. We define a set of sensitive topics that can yield inappropriate and toxic messages and describe the methodology of collecting and labelling a dataset for appropriateness. While toxicity in user-generated data is well-studied, we aim at defining a more fine-grained notion of inappropriateness. The core of inappropriateness is that it can harm the reputation of a speaker. This is different from toxicity in two respects: (i) inappropriateness is topic-related, and (ii) inappropriate message is not toxic but still unacceptable. We collect and release two datasets for Russian: a topic-labelled dataset and an appropriateness-labelled dataset. We also release pre-trained classification models trained on this data.}, + address = {Kiyv, Ukraine}, + author = {Babakov, Nikolay and +Logacheva, Varvara and +Kozlova, Olga and +Semenov, Nikita and +Panchenko, Alexander}, + booktitle = {Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing}, + editor = {Babych, Bogdan and +Kanishcheva, Olga and +Nakov, Preslav and +Piskorski, Jakub and +Pivovarova, Lidia and +Starko, Vasyl and +Steinberger, Josef and +Yangarber, Roman and +Marci{\'n}czuk, Micha{\l} and +Pollak, Senja and +P{\v{r}}ib{\'a}{\v{n}}, Pavel and +Robnik-{\v{S}}ikonja, Marko}, + month = apr, + pages = {26--36}, + publisher = {Association for Computational Linguistics}, + title = {Detecting Inappropriate Messages on Sensitive Topics that Could Harm a Company{'}s Reputation}, + url = {https://aclanthology.org/2021.bsnlp-1.4}, + year = {2021}, +} +""", prompt="Given a sentence as query, find sensitive topics", ) diff --git a/mteb/tasks/PairClassification/ara/ArEntail.py b/mteb/tasks/PairClassification/ara/ArEntail.py index 9afce29d71..a427f2ba30 100644 --- a/mteb/tasks/PairClassification/ara/ArEntail.py +++ b/mteb/tasks/PairClassification/ara/ArEntail.py @@ -29,14 +29,16 @@ class ArEntail(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{obeidat2024arentail, - title={ArEntail: manually-curated Arabic natural language inference dataset from news headlines}, - author={Obeidat, Rasha and Al-Harahsheh, Yara and Al-Ayyoub, Mahmoud and Gharaibeh, Maram}, - journal={Language Resources and Evaluation}, - pages={1--27}, - year={2024}, - publisher={Springer} - }""", + bibtex_citation=r""" +@article{obeidat2024arentail, + author = {Obeidat, Rasha and Al-Harahsheh, Yara and Al-Ayyoub, Mahmoud and Gharaibeh, Maram}, + journal = {Language Resources and Evaluation}, + pages = {1--27}, + publisher = {Springer}, + title = {ArEntail: manually-curated Arabic natural language inference dataset from news headlines}, + year = {2024}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/ces/CTKFactsNLI.py b/mteb/tasks/PairClassification/ces/CTKFactsNLI.py index 0083f2c8c7..cb51d912f3 100644 --- a/mteb/tasks/PairClassification/ces/CTKFactsNLI.py +++ b/mteb/tasks/PairClassification/ces/CTKFactsNLI.py @@ -27,16 +27,18 @@ class CTKFactsNLI(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{ullrich2023csfever, - title={CsFEVER and CTKFacts: acquiring Czech data for fact verification}, - author={Ullrich, Herbert and Drchal, Jan and R{\\`y}par, Martin and Vincourov{\\'a}, Hana and Moravec, V{\\'a}clav}, - journal={Language Resources and Evaluation}, - volume={57}, - number={4}, - pages={1571--1605}, - year={2023}, - publisher={Springer} - }""", # after removing label 1=NOT ENOUGH INFO + bibtex_citation=r""" +@article{ullrich2023csfever, + author = {Ullrich, Herbert and Drchal, Jan and R{\\`y}par, Martin and Vincourov{\\'a}, Hana and Moravec, V{\\'a}clav}, + journal = {Language Resources and Evaluation}, + number = {4}, + pages = {1571--1605}, + publisher = {Springer}, + title = {CsFEVER and CTKFacts: acquiring Czech data for fact verification}, + volume = {57}, + year = {2023}, +} +""", # after removing label 1=NOT ENOUGH INFO ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py b/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py index 9c34efc136..62c163ba0e 100644 --- a/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py +++ b/mteb/tasks/PairClassification/deu/FalseFriendsDeEnPC.py @@ -27,15 +27,15 @@ class FalseFriendsDeEnPC(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @misc{Chibb_2022, - title="{German-English False Friends in Multilingual Transformer Models: An Evaluation on Robustness and Word-to-Word Fine-Tuning}", - author="Chibb, Aaron", - year="2022", - month="Sep", - abstract="{This paper explores the robustness of multilingual language models against false friends. False friends are words that sound or are written the same in two different languages but have different meaning. Generally, it is argued that multilingual models, such as XLM-RoBERTA, can outperform monolingual models in most tasks on conventional datasets. However, false friends are not considered in these tests. In this paper, experiments with a false friends dataset show that multilingual models are not robust against false friends; they have problems creating monolingual representations and differentiating between meanings of similarly written words in different languages. An attempt of word-based finetuning multilingual models on false friends pairs is promising, however the results do not generally solve the presented problem and still, monolingual models are more robust against false friends.}" - } - """, + bibtex_citation=r""" +@misc{Chibb_2022, + abstract = {{This paper explores the robustness of multilingual language models against false friends. False friends are words that sound or are written the same in two different languages but have different meaning. Generally, it is argued that multilingual models, such as XLM-RoBERTA, can outperform monolingual models in most tasks on conventional datasets. However, false friends are not considered in these tests. In this paper, experiments with a false friends dataset show that multilingual models are not robust against false friends; they have problems creating monolingual representations and differentiating between meanings of similarly written words in different languages. An attempt of word-based finetuning multilingual models on false friends pairs is promising, however the results do not generally solve the presented problem and still, monolingual models are more robust against false friends.}}, + author = {Chibb, Aaron}, + month = {Sep}, + title = {{German-English False Friends in Multilingual Transformer Models: An Evaluation on Robustness and Word-to-Word Fine-Tuning}}, + year = {2022}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/eng/LegalBenchPC.py b/mteb/tasks/PairClassification/eng/LegalBenchPC.py index a8d02f469b..ff767bc178 100644 --- a/mteb/tasks/PairClassification/eng/LegalBenchPC.py +++ b/mteb/tasks/PairClassification/eng/LegalBenchPC.py @@ -84,39 +84,42 @@ class LegalBenchPC(AbsTaskPairClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - @article{kolt2022predicting, - title={Predicting consumer contracts}, - author={Kolt, Noam}, - journal={Berkeley Tech. LJ}, - volume={37}, - pages={71}, - year={2022}, - publisher={HeinOnline} - } - @article{zimmeck2019maps, - title={Maps: Scaling privacy compliance analysis to a million apps}, - author={Zimmeck, Sebastian and Story, Peter and Smullen, Daniel and Ravichander, Abhilasha and Wang, Ziqi and Reidenberg, Joel R and Russell, N Cameron and Sadeh, Norman}, - journal={Proc. Priv. Enhancing Tech.}, - volume={2019}, - pages={66}, - year={2019} - } - @article{ravichander2019question, - title={Question answering for privacy policies: Combining computational and legal perspectives}, - author={Ravichander, Abhilasha and Black, Alan W and Wilson, Shomir and Norton, Thomas and Sadeh, Norman}, - journal={arXiv preprint arXiv:1911.00841}, - year={2019} - } - """, + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, +} + +@article{kolt2022predicting, + author = {Kolt, Noam}, + journal = {Berkeley Tech. LJ}, + pages = {71}, + publisher = {HeinOnline}, + title = {Predicting consumer contracts}, + volume = {37}, + year = {2022}, +} + +@article{ravichander2019question, + author = {Ravichander, Abhilasha and Black, Alan W and Wilson, Shomir and Norton, Thomas and Sadeh, Norman}, + journal = {arXiv preprint arXiv:1911.00841}, + title = {Question answering for privacy policies: Combining computational and legal perspectives}, + year = {2019}, +} + +@article{zimmeck2019maps, + author = {Zimmeck, Sebastian and Story, Peter and Smullen, Daniel and Ravichander, Abhilasha and Wang, Ziqi and Reidenberg, Joel R and Russell, N Cameron and Sadeh, Norman}, + journal = {Proc. Priv. Enhancing Tech.}, + pages = {66}, + title = {Maps: Scaling privacy compliance analysis to a million apps}, + volume = {2019}, + year = {2019}, +} +""", ) def load_data(self, **kwargs: Any) -> None: diff --git a/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py b/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py index f453ebee31..7dffc2f1fb 100644 --- a/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py +++ b/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py @@ -26,24 +26,25 @@ class PubChemAISentenceParaphrasePC(AbsTaskPairClassification): annotations_creators="LM-generated", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{kim2023pubchem, - title={PubChem 2023 update}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, - journal={Nucleic acids research}, - volume={51}, - number={D1}, - pages={D1373--D1380}, - year={2023}, - publisher={Oxford University Press} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kim2023pubchem, + author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal = {Nucleic acids research}, + number = {D1}, + pages = {D1373--D1380}, + publisher = {Oxford University Press}, + title = {PubChem 2023 update}, + volume = {51}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py b/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py index b3e297e043..a6772b783a 100644 --- a/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py +++ b/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py @@ -55,24 +55,25 @@ class PubChemSMILESPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{kim2023pubchem, - title={PubChem 2023 update}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, - journal={Nucleic acids research}, - volume={51}, - number={D1}, - pages={D1373--D1380}, - year={2023}, - publisher={Oxford University Press} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kim2023pubchem, + author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal = {Nucleic acids research}, + number = {D1}, + pages = {D1373--D1380}, + publisher = {Oxford University Press}, + title = {PubChem 2023 update}, + volume = {51}, + year = {2023}, +} +""", ) def load_data(self): diff --git a/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py b/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py index 6b6dfd81c8..0102e73327 100644 --- a/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py +++ b/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py @@ -26,24 +26,25 @@ class PubChemSynonymPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{kim2023pubchem, - title={PubChem 2023 update}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, - journal={Nucleic acids research}, - volume={51}, - number={D1}, - pages={D1373--D1380}, - year={2023}, - publisher={Oxford University Press} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kim2023pubchem, + author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal = {Nucleic acids research}, + number = {D1}, + pages = {D1373--D1380}, + publisher = {Oxford University Press}, + title = {PubChem 2023 update}, + volume = {51}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py b/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py index 679580f28c..b0fe9962b5 100644 --- a/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py +++ b/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py @@ -26,24 +26,25 @@ class PubChemWikiParagraphsPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{kim2023pubchem, - title={PubChem 2023 update}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, - journal={Nucleic acids research}, - volume={51}, - number={D1}, - pages={D1373--D1380}, - year={2023}, - publisher={Oxford University Press} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kim2023pubchem, + author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal = {Nucleic acids research}, + number = {D1}, + pages = {D1373--D1380}, + publisher = {Oxford University Press}, + title = {PubChem 2023 update}, + volume = {51}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/eng/SprintDuplicateQuestionsPC.py b/mteb/tasks/PairClassification/eng/SprintDuplicateQuestionsPC.py index 4c1ea598e2..eaf3f4cf36 100644 --- a/mteb/tasks/PairClassification/eng/SprintDuplicateQuestionsPC.py +++ b/mteb/tasks/PairClassification/eng/SprintDuplicateQuestionsPC.py @@ -31,27 +31,29 @@ class SprintDuplicateQuestionsPC(AbsTaskPairClassification): dialect=[], sample_creation="found", prompt="Retrieve duplicate questions from Sprint forum", - bibtex_citation="""@inproceedings{shah-etal-2018-adversarial, - title = "Adversarial Domain Adaptation for Duplicate Question Detection", - author = "Shah, Darsh and - Lei, Tao and - Moschitti, Alessandro and - Romeo, Salvatore and - Nakov, Preslav", - editor = "Riloff, Ellen and - Chiang, David and - Hockenmaier, Julia and - Tsujii, Jun{'}ichi", - booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", - month = oct # "-" # nov, - year = "2018", - address = "Brussels, Belgium", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D18-1131", - doi = "10.18653/v1/D18-1131", - pages = "1056--1063", - abstract = "We address the problem of detecting duplicate questions in forums, which is an important step towards automating the process of answering new questions. As finding and annotating such potential duplicates manually is very tedious and costly, automatic methods based on machine learning are a viable alternative. However, many forums do not have annotated data, i.e., questions labeled by experts as duplicates, and thus a promising solution is to use domain adaptation from another forum that has such annotations. Here we focus on adversarial domain adaptation, deriving important findings about when it performs well and what properties of the domains are important in this regard. Our experiments with StackExchange data show an average improvement of 5.6{\%} over the best baseline across multiple pairs of domains.", -}""", + bibtex_citation=r""" +@inproceedings{shah-etal-2018-adversarial, + abstract = {We address the problem of detecting duplicate questions in forums, which is an important step towards automating the process of answering new questions. As finding and annotating such potential duplicates manually is very tedious and costly, automatic methods based on machine learning are a viable alternative. However, many forums do not have annotated data, i.e., questions labeled by experts as duplicates, and thus a promising solution is to use domain adaptation from another forum that has such annotations. Here we focus on adversarial domain adaptation, deriving important findings about when it performs well and what properties of the domains are important in this regard. Our experiments with StackExchange data show an average improvement of 5.6{\%} over the best baseline across multiple pairs of domains.}, + address = {Brussels, Belgium}, + author = {Shah, Darsh and +Lei, Tao and +Moschitti, Alessandro and +Romeo, Salvatore and +Nakov, Preslav}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D18-1131}, + editor = {Riloff, Ellen and +Chiang, David and +Hockenmaier, Julia and +Tsujii, Jun{'}ichi}, + month = oct # {-} # nov, + pages = {1056--1063}, + publisher = {Association for Computational Linguistics}, + title = {Adversarial Domain Adaptation for Duplicate Question Detection}, + url = {https://aclanthology.org/D18-1131}, + year = {2018}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/eng/TwitterSemEval2015PC.py b/mteb/tasks/PairClassification/eng/TwitterSemEval2015PC.py index 9da7c1072e..6914e6744e 100644 --- a/mteb/tasks/PairClassification/eng/TwitterSemEval2015PC.py +++ b/mteb/tasks/PairClassification/eng/TwitterSemEval2015PC.py @@ -27,24 +27,26 @@ class TwitterSemEval2015PC(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{xu-etal-2015-semeval, - title = "{S}em{E}val-2015 Task 1: Paraphrase and Semantic Similarity in {T}witter ({PIT})", - author = "Xu, Wei and - Callison-Burch, Chris and - Dolan, Bill", - editor = "Nakov, Preslav and - Zesch, Torsten and - Cer, Daniel and - Jurgens, David", - booktitle = "Proceedings of the 9th International Workshop on Semantic Evaluation ({S}em{E}val 2015)", - month = jun, - year = "2015", - address = "Denver, Colorado", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/S15-2001", - doi = "10.18653/v1/S15-2001", - pages = "1--11", - }""", + bibtex_citation=r""" +@inproceedings{xu-etal-2015-semeval, + address = {Denver, Colorado}, + author = {Xu, Wei and +Callison-Burch, Chris and +Dolan, Bill}, + booktitle = {Proceedings of the 9th International Workshop on Semantic Evaluation ({S}em{E}val 2015)}, + doi = {10.18653/v1/S15-2001}, + editor = {Nakov, Preslav and +Zesch, Torsten and +Cer, Daniel and +Jurgens, David}, + month = jun, + pages = {1--11}, + publisher = {Association for Computational Linguistics}, + title = {{S}em{E}val-2015 Task 1: Paraphrase and Semantic Similarity in {T}witter ({PIT})}, + url = {https://aclanthology.org/S15-2001}, + year = {2015}, +} +""", prompt="Retrieve tweets that are semantically similar to the given tweet", ) diff --git a/mteb/tasks/PairClassification/eng/TwitterURLCorpusPC.py b/mteb/tasks/PairClassification/eng/TwitterURLCorpusPC.py index 85432b1d97..d31ff81410 100644 --- a/mteb/tasks/PairClassification/eng/TwitterURLCorpusPC.py +++ b/mteb/tasks/PairClassification/eng/TwitterURLCorpusPC.py @@ -27,25 +27,27 @@ class TwitterURLCorpusPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{lan-etal-2017-continuously, - title = "A Continuously Growing Dataset of Sentential Paraphrases", - author = "Lan, Wuwei and - Qiu, Siyu and - He, Hua and - Xu, Wei", - editor = "Palmer, Martha and - Hwa, Rebecca and - Riedel, Sebastian", - booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing", - month = sep, - year = "2017", - address = "Copenhagen, Denmark", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D17-1126", - doi = "10.18653/v1/D17-1126", - pages = "1224--1234", - abstract = "A major challenge in paraphrase research is the lack of parallel corpora. In this paper, we present a new method to collect large-scale sentential paraphrases from Twitter by linking tweets through shared URLs. The main advantage of our method is its simplicity, as it gets rid of the classifier or human in the loop needed to select data before annotation and subsequent application of paraphrase identification algorithms in the previous work. We present the largest human-labeled paraphrase corpus to date of 51,524 sentence pairs and the first cross-domain benchmarking for automatic paraphrase identification. In addition, we show that more than 30,000 new sentential paraphrases can be easily and continuously captured every month at {\textasciitilde}70{\%} precision, and demonstrate their utility for downstream NLP tasks through phrasal paraphrase extraction. We make our code and data freely available.", - }""", + bibtex_citation=r""" +@inproceedings{lan-etal-2017-continuously, + abstract = {A major challenge in paraphrase research is the lack of parallel corpora. In this paper, we present a new method to collect large-scale sentential paraphrases from Twitter by linking tweets through shared URLs. The main advantage of our method is its simplicity, as it gets rid of the classifier or human in the loop needed to select data before annotation and subsequent application of paraphrase identification algorithms in the previous work. We present the largest human-labeled paraphrase corpus to date of 51,524 sentence pairs and the first cross-domain benchmarking for automatic paraphrase identification. In addition, we show that more than 30,000 new sentential paraphrases can be easily and continuously captured every month at {\textasciitilde}70{\%} precision, and demonstrate their utility for downstream NLP tasks through phrasal paraphrase extraction. We make our code and data freely available.}, + address = {Copenhagen, Denmark}, + author = {Lan, Wuwei and +Qiu, Siyu and +He, Hua and +Xu, Wei}, + booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D17-1126}, + editor = {Palmer, Martha and +Hwa, Rebecca and +Riedel, Sebastian}, + month = sep, + pages = {1224--1234}, + publisher = {Association for Computational Linguistics}, + title = {A Continuously Growing Dataset of Sentential Paraphrases}, + url = {https://aclanthology.org/D17-1126}, + year = {2017}, +} +""", prompt="Retrieve tweets that are semantically similar to the given tweet", ) diff --git a/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py b/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py index 98deac52e9..01cccca055 100644 --- a/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py +++ b/mteb/tasks/PairClassification/fas/FaMTEBPairClassification.py @@ -26,16 +26,19 @@ class CExaPPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@INPROCEEDINGS{9786243, - author={Sadeghi, Reyhaneh and Karbasi, Hamed and Akbari, Ahmad}, - booktitle={2022 8th International Conference on Web Research (ICWR)}, - title={ExaPPC: a Large-Scale Persian Paraphrase Detection Corpus}, - year={2022}, - volume={}, - number={}, - pages={168-175}, - keywords={Data mining;Task analysis;Paraphrase Identification;Semantic Similarity;Deep Learning;Paraphrasing Corpora}, - doi={10.1109/ICWR54782.2022.9786243}}""", + bibtex_citation=r""" +@inproceedings{9786243, + author = {Sadeghi, Reyhaneh and Karbasi, Hamed and Akbari, Ahmad}, + booktitle = {2022 8th International Conference on Web Research (ICWR)}, + doi = {10.1109/ICWR54782.2022.9786243}, + keywords = {Data mining;Task analysis;Paraphrase Identification;Semantic Similarity;Deep Learning;Paraphrasing Corpora}, + number = {}, + pages = {168-175}, + title = {ExaPPC: a Large-Scale Persian Paraphrase Detection Corpus}, + volume = {}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -229,15 +232,17 @@ class ParsinluEntail(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{khashabi2021parsinlusuitelanguageunderstanding, - title={ParsiNLU: A Suite of Language Understanding Challenges for Persian}, - author={Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh}, - year={2021}, - eprint={2012.06154}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2012.06154}, -}""", + bibtex_citation=r""" +@misc{khashabi2021parsinlusuitelanguageunderstanding, + archiveprefix = {arXiv}, + author = {Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh}, + eprint = {2012.06154}, + primaryclass = {cs.CL}, + title = {ParsiNLU: A Suite of Language Understanding Challenges for Persian}, + url = {https://arxiv.org/abs/2012.06154}, + year = {2021}, +} +""", ) def dataset_transform(self): @@ -280,15 +285,17 @@ class ParsinluQueryParaphPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{khashabi2021parsinlusuitelanguageunderstanding, - title={ParsiNLU: A Suite of Language Understanding Challenges for Persian}, - author={Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh}, - year={2021}, - eprint={2012.06154}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2012.06154}, -}""", + bibtex_citation=r""" +@misc{khashabi2021parsinlusuitelanguageunderstanding, + archiveprefix = {arXiv}, + author = {Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh}, + eprint = {2012.06154}, + primaryclass = {cs.CL}, + title = {ParsiNLU: A Suite of Language Understanding Challenges for Persian}, + url = {https://arxiv.org/abs/2012.06154}, + year = {2021}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/fas/FarsTail.py b/mteb/tasks/PairClassification/fas/FarsTail.py index 552e953f77..220a9756f8 100644 --- a/mteb/tasks/PairClassification/fas/FarsTail.py +++ b/mteb/tasks/PairClassification/fas/FarsTail.py @@ -28,14 +28,16 @@ class FarsTail(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{amirkhani2023farstail, - title={FarsTail: a Persian natural language inference dataset}, - author={Amirkhani, Hossein and AzariJafari, Mohammad and Faridan-Jahromi, Soroush and Kouhkan, Zeinab and Pourjafari, Zohreh and Amirak, Azadeh}, - journal={Soft Computing}, - year={2023}, - publisher={Springer}, - doi={10.1007/s00500-023-08959-3} - }""", # after removing neutral + bibtex_citation=r""" +@article{amirkhani2023farstail, + author = {Amirkhani, Hossein and AzariJafari, Mohammad and Faridan-Jahromi, Soroush and Kouhkan, Zeinab and Pourjafari, Zohreh and Amirak, Azadeh}, + doi = {10.1007/s00500-023-08959-3}, + journal = {Soft Computing}, + publisher = {Springer}, + title = {FarsTail: a Persian natural language inference dataset}, + year = {2023}, +} +""", # after removing neutral ) def load_data(self, **kwargs): diff --git a/mteb/tasks/PairClassification/hye/ArmenianParaphrasePC.py b/mteb/tasks/PairClassification/hye/ArmenianParaphrasePC.py index 04431c2238..53a6f537d3 100644 --- a/mteb/tasks/PairClassification/hye/ArmenianParaphrasePC.py +++ b/mteb/tasks/PairClassification/hye/ArmenianParaphrasePC.py @@ -26,16 +26,16 @@ class ArmenianParaphrasePC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{malajyan2020arpa, - title={ARPA: Armenian Paraphrase Detection Corpus and Models}, - author={Arthur Malajyan and Karen Avetisyan and Tsolak Ghukasyan}, - year={2020}, - eprint={2009.12615}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{malajyan2020arpa, + archiveprefix = {arXiv}, + author = {Arthur Malajyan and Karen Avetisyan and Tsolak Ghukasyan}, + eprint = {2009.12615}, + primaryclass = {cs.CL}, + title = {ARPA: Armenian Paraphrase Detection Corpus and Models}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/ind/IndoNLI.py b/mteb/tasks/PairClassification/ind/IndoNLI.py index ac0976e475..f389c7fa0b 100644 --- a/mteb/tasks/PairClassification/ind/IndoNLI.py +++ b/mteb/tasks/PairClassification/ind/IndoNLI.py @@ -27,17 +27,19 @@ class IndoNLI(AbsTaskPairClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{mahendra-etal-2021-indonli, - title = "{I}ndo{NLI}: A Natural Language Inference Dataset for {I}ndonesian", - author = "Mahendra, Rahmad and Aji, Alham Fikri and Louvan, Samuel and Rahman, Fahrurrozi and Vania, Clara", - booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", - month = nov, - year = "2021", - address = "Online and Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.emnlp-main.821", - pages = "10511--10527", - }""", + bibtex_citation=r""" +@inproceedings{mahendra-etal-2021-indonli, + address = {Online and Punta Cana, Dominican Republic}, + author = {Mahendra, Rahmad and Aji, Alham Fikri and Louvan, Samuel and Rahman, Fahrurrozi and Vania, Clara}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + month = nov, + pages = {10511--10527}, + publisher = {Association for Computational Linguistics}, + title = {{I}ndo{NLI}: A Natural Language Inference Dataset for {I}ndonesian}, + url = {https://aclanthology.org/2021.emnlp-main.821}, + year = {2021}, +} +""", # after removing neutral ) diff --git a/mteb/tasks/PairClassification/kor/KlueNLI.py b/mteb/tasks/PairClassification/kor/KlueNLI.py index 9bd2a0d2c6..f5092133a4 100644 --- a/mteb/tasks/PairClassification/kor/KlueNLI.py +++ b/mteb/tasks/PairClassification/kor/KlueNLI.py @@ -27,14 +27,16 @@ class KlueNLI(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{park2021klue, - title={KLUE: Korean Language Understanding Evaluation}, - author={Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, - year={2021}, - eprint={2105.09680}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", # 3000 - neutral samples + bibtex_citation=r""" +@misc{park2021klue, + archiveprefix = {arXiv}, + author = {Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, + eprint = {2105.09680}, + primaryclass = {cs.CL}, + title = {KLUE: Korean Language Understanding Evaluation}, + year = {2021}, +} +""", # 3000 - neutral samples ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py b/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py index c26394d92d..0a48487903 100644 --- a/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py +++ b/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py @@ -49,17 +49,17 @@ class IndicXnliPairClassification(AbsTaskPairClassification, MultilingualTask): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation=""" - @misc{aggarwal_gupta_kunch_22, - doi = {10.48550/ARXIV.2204.08776}, - url = {https://arxiv.org/abs/2204.08776}, - author = {Aggarwal, Divyanshu and Gupta, Vivek and Kunchukuttan, Anoop}, - title = {IndicXNLI: Evaluating Multilingual Inference for Indian Languages}, - publisher = {arXiv}, - year = {2022}, - copyright = {Creative Commons Attribution 4.0 International} - } - """, + bibtex_citation=r""" +@misc{aggarwal_gupta_kunch_22, + author = {Aggarwal, Divyanshu and Gupta, Vivek and Kunchukuttan, Anoop}, + copyright = {Creative Commons Attribution 4.0 International}, + doi = {10.48550/ARXIV.2204.08776}, + publisher = {arXiv}, + title = {IndicXNLI: Evaluating Multilingual Inference for Indian Languages}, + url = {https://arxiv.org/abs/2204.08776}, + year = {2022}, +} +""", # average of premise and hypothesis ) diff --git a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py index bc23e7956d..da1e9c32dd 100644 --- a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py +++ b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py @@ -39,14 +39,16 @@ class OpusparcusPC(AbsTaskPairClassification, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@misc{creutz2018open, - title={Open Subtitles Paraphrase Corpus for Six Languages}, - author={Mathias Creutz}, - year={2018}, - eprint={1809.06142}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{creutz2018open, + archiveprefix = {arXiv}, + author = {Mathias Creutz}, + eprint = {1809.06142}, + primaryclass = {cs.CL}, + title = {Open Subtitles Paraphrase Corpus for Six Languages}, + year = {2018}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/PairClassification/multilingual/PawsXPairClassification.py b/mteb/tasks/PairClassification/multilingual/PawsXPairClassification.py index 66bc37de95..dee2952dd8 100644 --- a/mteb/tasks/PairClassification/multilingual/PawsXPairClassification.py +++ b/mteb/tasks/PairClassification/multilingual/PawsXPairClassification.py @@ -37,14 +37,16 @@ class PawsXPairClassification(MultilingualTask, AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated", - bibtex_citation="""@misc{yang2019pawsx, - title={PAWS-X: A Cross-lingual Adversarial Dataset for Paraphrase Identification}, - author={Yinfei Yang and Yuan Zhang and Chris Tar and Jason Baldridge}, - year={2019}, - eprint={1908.11828}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{yang2019pawsx, + archiveprefix = {arXiv}, + author = {Yinfei Yang and Yuan Zhang and Chris Tar and Jason Baldridge}, + eprint = {1908.11828}, + primaryclass = {cs.CL}, + title = {PAWS-X: A Cross-lingual Adversarial Dataset for Paraphrase Identification}, + year = {2019}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py b/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py index f1b3102fbc..c3be5a701b 100644 --- a/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py +++ b/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py @@ -42,24 +42,25 @@ class PubChemWikiPairClassification(AbsTaskPairClassification, MultilingualTask) annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{kim2023pubchem, - title={PubChem 2023 update}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, - journal={Nucleic acids research}, - volume={51}, - number={D1}, - pages={D1373--D1380}, - year={2023}, - publisher={Oxford University Press} - } - """, + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain}, + year = {2024}, +} + +@article{kim2023pubchem, + author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal = {Nucleic acids research}, + number = {D1}, + pages = {D1373--D1380}, + publisher = {Oxford University Press}, + title = {PubChem 2023 update}, + volume = {51}, + year = {2023}, +} +""", ) def dataset_transform(self) -> None: diff --git a/mteb/tasks/PairClassification/multilingual/RTE3.py b/mteb/tasks/PairClassification/multilingual/RTE3.py index 9a03fedb4f..49c2e0cdd3 100644 --- a/mteb/tasks/PairClassification/multilingual/RTE3.py +++ b/mteb/tasks/PairClassification/multilingual/RTE3.py @@ -36,21 +36,22 @@ class RTE3(MultilingualTask, AbsTaskPairClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{giampiccolo-etal-2007-third, - title = "The Third {PASCAL} Recognizing Textual Entailment Challenge", - author = "Giampiccolo, Danilo and - Magnini, Bernardo and - Dagan, Ido and - Dolan, Bill", - booktitle = "Proceedings of the {ACL}-{PASCAL} Workshop on Textual Entailment and Paraphrasing", - month = jun, - year = "2007", - address = "Prague", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/W07-1401", - pages = "1--9", - } - """, + bibtex_citation=r""" +@inproceedings{giampiccolo-etal-2007-third, + address = {Prague}, + author = {Giampiccolo, Danilo and +Magnini, Bernardo and +Dagan, Ido and +Dolan, Bill}, + booktitle = {Proceedings of the {ACL}-{PASCAL} Workshop on Textual Entailment and Paraphrasing}, + month = jun, + pages = {1--9}, + publisher = {Association for Computational Linguistics}, + title = {The Third {PASCAL} Recognizing Textual Entailment Challenge}, + url = {https://aclanthology.org/W07-1401}, + year = {2007}, +} +""", # sum of 4 languages after neutral filtering ) diff --git a/mteb/tasks/PairClassification/multilingual/XNLI.py b/mteb/tasks/PairClassification/multilingual/XNLI.py index 8f3f795bad..0c303a1aa1 100644 --- a/mteb/tasks/PairClassification/multilingual/XNLI.py +++ b/mteb/tasks/PairClassification/multilingual/XNLI.py @@ -44,22 +44,23 @@ class XNLI(MultilingualTask, AbsTaskPairClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@InProceedings{conneau2018xnli, - author = {Conneau, Alexis - and Rinott, Ruty - and Lample, Guillaume - and Williams, Adina - and Bowman, Samuel R. - and Schwenk, Holger - and Stoyanov, Veselin}, - title = {XNLI: Evaluating Cross-lingual Sentence Representations}, - booktitle = {Proceedings of the 2018 Conference on Empirical Methods - in Natural Language Processing}, - year = {2018}, - publisher = {Association for Computational Linguistics}, - location = {Brussels, Belgium}, - } - """, + bibtex_citation=r""" +@inproceedings{conneau2018xnli, + author = {Conneau, Alexis +and Rinott, Ruty +and Lample, Guillaume +and Williams, Adina +and Bowman, Samuel R. +and Schwenk, Holger +and Stoyanov, Veselin}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods +in Natural Language Processing}, + location = {Brussels, Belgium}, + publisher = {Association for Computational Linguistics}, + title = {XNLI: Evaluating Cross-lingual Sentence Representations}, + year = {2018}, +} +""", ) def dataset_transform(self): @@ -131,15 +132,16 @@ class XNLIV2(MultilingualTask, AbsTaskPairClassification): annotations_creators="expert-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@inproceedings{upadhyay2023xnli, - title={XNLI 2.0: Improving XNLI dataset and performance on Cross Lingual Understanding (XLU)}, - author={Upadhyay, Ankit Kumar and Upadhya, Harsit Kumar}, - booktitle={2023 IEEE 8th International Conference for Convergence in Technology (I2CT)}, - pages={1--6}, - year={2023}, - organization={IEEE} - } - """, + bibtex_citation=r""" +@inproceedings{upadhyay2023xnli, + author = {Upadhyay, Ankit Kumar and Upadhya, Harsit Kumar}, + booktitle = {2023 IEEE 8th International Conference for Convergence in Technology (I2CT)}, + organization = {IEEE}, + pages = {1--6}, + title = {XNLI 2.0: Improving XNLI dataset and performance on Cross Lingual Understanding (XLU)}, + year = {2023}, +} +""", # average of premise and hypothesis ) diff --git a/mteb/tasks/PairClassification/multilingual/XStance.py b/mteb/tasks/PairClassification/multilingual/XStance.py index 03d4f066e7..9852374e06 100644 --- a/mteb/tasks/PairClassification/multilingual/XStance.py +++ b/mteb/tasks/PairClassification/multilingual/XStance.py @@ -35,17 +35,17 @@ class XStance(MultilingualTask, AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" - @inproceedings{vamvas2020xstance, - author = "Vamvas, Jannis and Sennrich, Rico", - title = "{X-Stance}: A Multilingual Multi-Target Dataset for Stance Detection", - booktitle = "Proceedings of the 5th Swiss Text Analytics Conference (SwissText) 16th Conference on Natural Language Processing (KONVENS)", - address = "Zurich, Switzerland", - year = "2020", - month = "jun", - url = "http://ceur-ws.org/Vol-2624/paper9.pdf" - } - """, + bibtex_citation=r""" +@inproceedings{vamvas2020xstance, + address = {Zurich, Switzerland}, + author = {Vamvas, Jannis and Sennrich, Rico}, + booktitle = {Proceedings of the 5th Swiss Text Analytics Conference (SwissText) 16th Conference on Natural Language Processing (KONVENS)}, + month = {jun}, + title = {{X-Stance}: A Multilingual Multi-Target Dataset for Stance Detection}, + url = {http://ceur-ws.org/Vol-2624/paper9.pdf}, + year = {2020}, +} +""", # length of`sent1` + `sent2` ) diff --git a/mteb/tasks/PairClassification/pol/PolishPC.py b/mteb/tasks/PairClassification/pol/PolishPC.py index 9e431b05ec..d87811accc 100644 --- a/mteb/tasks/PairClassification/pol/PolishPC.py +++ b/mteb/tasks/PairClassification/pol/PolishPC.py @@ -27,36 +27,38 @@ class SickePLPC(AbsTaskPairClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{dadas-etal-2020-evaluation, - title = "Evaluation of Sentence Representations in {P}olish", - author = "Dadas, Slawomir and - Pere{\l}kiewicz, Micha{\l} and - Po{\'s}wiata, Rafa{\l}", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.207", - pages = "1674--1680", - abstract = "Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.", - language = "English", - ISBN = "979-10-95546-34-4", - }""", + bibtex_citation=r""" +@inproceedings{dadas-etal-2020-evaluation, + abstract = {Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.}, + address = {Marseille, France}, + author = {Dadas, Slawomir and +Pere{\l}kiewicz, Micha{\l} and +Po{\'s}wiata, Rafa{\l}}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + isbn = {979-10-95546-34-4}, + language = {English}, + month = may, + pages = {1674--1680}, + publisher = {European Language Resources Association}, + title = {Evaluation of Sentence Representations in {P}olish}, + url = {https://aclanthology.org/2020.lrec-1.207}, + year = {2020}, +} +""", ) def dataset_transform(self): @@ -95,14 +97,16 @@ class PpcPC(AbsTaskPairClassification): annotations_creators="derived", # mined dialect=[], sample_creation="found", - bibtex_citation="""@misc{dadas2022training, - title={Training Effective Neural Sentence Encoders from Automatically Mined Paraphrases}, - author={Sławomir Dadas}, - year={2022}, - eprint={2207.12759}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{dadas2022training, + archiveprefix = {arXiv}, + author = {Sławomir Dadas}, + eprint = {2207.12759}, + primaryclass = {cs.CL}, + title = {Training Effective Neural Sentence Encoders from Automatically Mined Paraphrases}, + year = {2022}, +} +""", ) def dataset_transform(self): @@ -132,22 +136,24 @@ class CdscePC(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{wroblewska-krasnowska-kieras-2017-polish, - title = "{P}olish evaluation dataset for compositional distributional semantics models", - author = "Wr{\'o}blewska, Alina and - Krasnowska-Kiera{\'s}, Katarzyna", - editor = "Barzilay, Regina and - Kan, Min-Yen", - booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = jul, - year = "2017", - address = "Vancouver, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/P17-1073", - doi = "10.18653/v1/P17-1073", - pages = "784--792", - abstract = "The paper presents a procedure of building an evaluation dataset. for the validation of compositional distributional semantics models estimated for languages other than English. The procedure generally builds on steps designed to assemble the SICK corpus, which contains pairs of English sentences annotated for semantic relatedness and entailment, because we aim at building a comparable dataset. However, the implementation of particular building steps significantly differs from the original SICK design assumptions, which is caused by both lack of necessary extraneous resources for an investigated language and the need for language-specific transformation rules. The designed procedure is verified on Polish, a fusional language with a relatively free word order, and contributes to building a Polish evaluation dataset. The resource consists of 10K sentence pairs which are human-annotated for semantic relatedness and entailment. The dataset may be used for the evaluation of compositional distributional semantics models of Polish.", - }""", + bibtex_citation=r""" +@inproceedings{wroblewska-krasnowska-kieras-2017-polish, + abstract = {The paper presents a procedure of building an evaluation dataset. for the validation of compositional distributional semantics models estimated for languages other than English. The procedure generally builds on steps designed to assemble the SICK corpus, which contains pairs of English sentences annotated for semantic relatedness and entailment, because we aim at building a comparable dataset. However, the implementation of particular building steps significantly differs from the original SICK design assumptions, which is caused by both lack of necessary extraneous resources for an investigated language and the need for language-specific transformation rules. The designed procedure is verified on Polish, a fusional language with a relatively free word order, and contributes to building a Polish evaluation dataset. The resource consists of 10K sentence pairs which are human-annotated for semantic relatedness and entailment. The dataset may be used for the evaluation of compositional distributional semantics models of Polish.}, + address = {Vancouver, Canada}, + author = {Wr{\'o}blewska, Alina and +Krasnowska-Kiera{\'s}, Katarzyna}, + booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.18653/v1/P17-1073}, + editor = {Barzilay, Regina and +Kan, Min-Yen}, + month = jul, + pages = {784--792}, + publisher = {Association for Computational Linguistics}, + title = {{P}olish evaluation dataset for compositional distributional semantics models}, + url = {https://aclanthology.org/P17-1073}, + year = {2017}, +} +""", ) def dataset_transform(self): @@ -177,28 +183,30 @@ class PscPC(AbsTaskPairClassification): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{ogrodniczuk-kopec-2014-polish, - title = "The {P}olish Summaries Corpus", - author = "Ogrodniczuk, Maciej and - Kope{\'c}, Mateusz", - editor = "Calzolari, Nicoletta and - Choukri, Khalid and - Declerck, Thierry and - Loftsson, Hrafn and - Maegaard, Bente and - Mariani, Joseph and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)", - month = may, - year = "2014", - address = "Reykjavik, Iceland", - publisher = "European Language Resources Association (ELRA)", - url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf", - pages = "3712--3715", - abstract = "This article presents the Polish Summaries Corpus, a new resource created to support the development and evaluation of the tools for automated single-document summarization of Polish. The Corpus contains a large number of manual summaries of news articles, with many independently created summaries for a single text. Such approach is supposed to overcome the annotator bias, which is often described as a problem during the evaluation of the summarization algorithms against a single gold standard. There are several summarizers developed specifically for Polish language, but their in-depth evaluation and comparison was impossible without a large, manually created corpus. We present in detail the process of text selection, annotation process and the contents of the corpus, which includes both abstract free-word summaries, as well as extraction-based summaries created by selecting text spans from the original document. Finally, we describe how that resource could be used not only for the evaluation of the existing summarization tools, but also for studies on the human summarization process in Polish language.", - }""", + bibtex_citation=r""" +@inproceedings{ogrodniczuk-kopec-2014-polish, + abstract = {This article presents the Polish Summaries Corpus, a new resource created to support the development and evaluation of the tools for automated single-document summarization of Polish. The Corpus contains a large number of manual summaries of news articles, with many independently created summaries for a single text. Such approach is supposed to overcome the annotator bias, which is often described as a problem during the evaluation of the summarization algorithms against a single gold standard. There are several summarizers developed specifically for Polish language, but their in-depth evaluation and comparison was impossible without a large, manually created corpus. We present in detail the process of text selection, annotation process and the contents of the corpus, which includes both abstract free-word summaries, as well as extraction-based summaries created by selecting text spans from the original document. Finally, we describe how that resource could be used not only for the evaluation of the existing summarization tools, but also for studies on the human summarization process in Polish language.}, + address = {Reykjavik, Iceland}, + author = {Ogrodniczuk, Maciej and +Kope{\'c}, Mateusz}, + booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)}, + editor = {Calzolari, Nicoletta and +Choukri, Khalid and +Declerck, Thierry and +Loftsson, Hrafn and +Maegaard, Bente and +Mariani, Joseph and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + month = may, + pages = {3712--3715}, + publisher = {European Language Resources Association (ELRA)}, + title = {The {P}olish Summaries Corpus}, + url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf}, + year = {2014}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/por/Assin2RTE.py b/mteb/tasks/PairClassification/por/Assin2RTE.py index aa0046cb6e..70d6a1b929 100644 --- a/mteb/tasks/PairClassification/por/Assin2RTE.py +++ b/mteb/tasks/PairClassification/por/Assin2RTE.py @@ -26,14 +26,16 @@ class Assin2RTE(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{real2020assin, - title={The assin 2 shared task: a quick overview}, - author={Real, Livy and Fonseca, Erick and Oliveira, Hugo Goncalo}, - booktitle={International Conference on Computational Processing of the Portuguese Language}, - pages={406--412}, - year={2020}, - organization={Springer} - }""", + bibtex_citation=r""" +@inproceedings{real2020assin, + author = {Real, Livy and Fonseca, Erick and Oliveira, Hugo Goncalo}, + booktitle = {International Conference on Computational Processing of the Portuguese Language}, + organization = {Springer}, + pages = {406--412}, + title = {The assin 2 shared task: a quick overview}, + year = {2020}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/por/SickBrPC.py b/mteb/tasks/PairClassification/por/SickBrPC.py index f656b4e710..445e6c87b4 100644 --- a/mteb/tasks/PairClassification/por/SickBrPC.py +++ b/mteb/tasks/PairClassification/por/SickBrPC.py @@ -26,27 +26,27 @@ class SickBrPC(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation=""" - @inproceedings{real18, - author={Real, Livy - and Rodrigues, Ana - and Vieira e Silva, Andressa - and Albiero, Beatriz - and Thalenberg, Bruna - and Guide, Bruno - and Silva, Cindy - and de Oliveira Lima, Guilherme - and C{\^a}mara, Igor C. S. - and Stanojevi{\'{c}}, Milo{\v{s}} - and Souza, Rodrigo - and de Paiva, Valeria}, - year ="2018", - title="{SICK-BR: A Portuguese Corpus for Inference}", - booktitle="{Computational Processing of the Portuguese Language. PROPOR 2018.}", - doi ="10.1007/978-3-319-99722-3_31", - isbn="978-3-319-99722-3" - } - """, + bibtex_citation=r""" +@inproceedings{real18, + author = {Real, Livy +and Rodrigues, Ana +and Vieira e Silva, Andressa +and Albiero, Beatriz +and Thalenberg, Bruna +and Guide, Bruno +and Silva, Cindy +and de Oliveira Lima, Guilherme +and C{\^a}mara, Igor C. S. +and Stanojevi{\'{c}}, Milo{\v{s}} +and Souza, Rodrigo +and de Paiva, Valeria}, + booktitle = {{Computational Processing of the Portuguese Language. PROPOR 2018.}}, + doi = {10.1007/978-3-319-99722-3_31}, + isbn = {978-3-319-99722-3}, + title = {{SICK-BR: A Portuguese Corpus for Inference}}, + year = {2018}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/PairClassification/rus/TERRa.py b/mteb/tasks/PairClassification/rus/TERRa.py index 50b9560a46..3fc382b497 100644 --- a/mteb/tasks/PairClassification/rus/TERRa.py +++ b/mteb/tasks/PairClassification/rus/TERRa.py @@ -28,21 +28,23 @@ class TERRa(AbsTaskPairClassification): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{shavrina2020russiansuperglue, - title={RussianSuperGLUE: A Russian Language Understanding Evaluation Benchmark}, - author={Shavrina, Tatiana - and Fenogenova, Alena - and Emelyanov, Anton - and Shevelev, Denis - and Artemova, Ekaterina - and Malykh, Valentin - and Mikhailov, Vladislav - and Tikhonova, Maria - and Chertok, Andrey - and Evlampiev, Andrey}, - journal={arXiv preprint arXiv:2010.15925}, - year={2020} - }""", + bibtex_citation=r""" +@article{shavrina2020russiansuperglue, + author = {Shavrina, Tatiana +and Fenogenova, Alena +and Emelyanov, Anton +and Shevelev, Denis +and Artemova, Ekaterina +and Malykh, Valentin +and Mikhailov, Vladislav +and Tikhonova, Maria +and Chertok, Andrey +and Evlampiev, Andrey}, + journal = {arXiv preprint arXiv:2010.15925}, + title = {RussianSuperGLUE: A Russian Language Understanding Evaluation Benchmark}, + year = {2020}, +} +""", prompt="Given a premise, retrieve a hypothesis that is entailed by the premise", ) diff --git a/mteb/tasks/PairClassification/zho/CMTEBPairClassification.py b/mteb/tasks/PairClassification/zho/CMTEBPairClassification.py index 3ddb8c290d..63ba4dc64a 100644 --- a/mteb/tasks/PairClassification/zho/CMTEBPairClassification.py +++ b/mteb/tasks/PairClassification/zho/CMTEBPairClassification.py @@ -26,14 +26,16 @@ class Ocnli(AbsTaskPairClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{hu2020ocnli, - title={OCNLI: Original Chinese Natural Language Inference}, - author={Hai Hu and Kyle Richardson and Liang Xu and Lu Li and Sandra Kuebler and Lawrence S. Moss}, - year={2020}, - eprint={2010.05444}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - }""", + bibtex_citation=r""" +@misc{hu2020ocnli, + archiveprefix = {arXiv}, + author = {Hai Hu and Kyle Richardson and Liang Xu and Lu Li and Sandra Kuebler and Lawrence S. Moss}, + eprint = {2010.05444}, + primaryclass = {cs.CL}, + title = {OCNLI: Original Chinese Natural Language Inference}, + year = {2020}, +} +""", prompt="Retrieve semantically similar text.", ) @@ -64,49 +66,51 @@ class Cmnli(AbsTaskPairClassification): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{xu-etal-2020-clue, - title = "{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark", - author = "Xu, Liang and - Hu, Hai and - Zhang, Xuanwei and - Li, Lu and - Cao, Chenjie and - Li, Yudong and - Xu, Yechen and - Sun, Kai and - Yu, Dian and - Yu, Cong and - Tian, Yin and - Dong, Qianqian and - Liu, Weitang and - Shi, Bo and - Cui, Yiming and - Li, Junyi and - Zeng, Jun and - Wang, Rongzhao and - Xie, Weijian and - Li, Yanting and - Patterson, Yina and - Tian, Zuoyu and - Zhang, Yiwen and - Zhou, He and - Liu, Shaoweihua and - Zhao, Zhe and - Zhao, Qipeng and - Yue, Cong and - Zhang, Xinrui and - Yang, Zhengliang and - Richardson, Kyle and - Lan, Zhenzhong", - booktitle = "Proceedings of the 28th International Conference on Computational Linguistics", - month = dec, - year = "2020", - address = "Barcelona, Spain (Online)", - publisher = "International Committee on Computational Linguistics", - url = "https://aclanthology.org/2020.coling-main.419", - doi = "10.18653/v1/2020.coling-main.419", - pages = "4762--4772", - }""", + bibtex_citation=r""" +@inproceedings{xu-etal-2020-clue, + address = {Barcelona, Spain (Online)}, + author = {Xu, Liang and +Hu, Hai and +Zhang, Xuanwei and +Li, Lu and +Cao, Chenjie and +Li, Yudong and +Xu, Yechen and +Sun, Kai and +Yu, Dian and +Yu, Cong and +Tian, Yin and +Dong, Qianqian and +Liu, Weitang and +Shi, Bo and +Cui, Yiming and +Li, Junyi and +Zeng, Jun and +Wang, Rongzhao and +Xie, Weijian and +Li, Yanting and +Patterson, Yina and +Tian, Zuoyu and +Zhang, Yiwen and +Zhou, He and +Liu, Shaoweihua and +Zhao, Zhe and +Zhao, Qipeng and +Yue, Cong and +Zhang, Xinrui and +Yang, Zhengliang and +Richardson, Kyle and +Lan, Zhenzhong}, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.419}, + month = dec, + pages = {4762--4772}, + publisher = {International Committee on Computational Linguistics}, + title = {{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark}, + url = {https://aclanthology.org/2020.coling-main.419}, + year = {2020}, +} +""", prompt="Retrieve semantically similar text.", ) diff --git a/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py b/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py index dd305727ee..b037a2c544 100644 --- a/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py +++ b/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py @@ -27,13 +27,15 @@ class NamaaMrTydiReranking(AbsTaskReranking): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{muennighoff2022mteb, - doi = {10.48550/ARXIV.2210.07316}, - url = {https://arxiv.org/abs/2210.07316}, + bibtex_citation=r""" +@article{muennighoff2022mteb, author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\\i}c and Reimers, Nils}, - title = {MTEB: Massive Text Embedding Benchmark}, + doi = {10.48550/ARXIV.2210.07316}, + journal = {arXiv preprint arXiv:2210.07316}, publisher = {arXiv}, - journal={arXiv preprint arXiv:2210.07316}, - year = {2022} -}""", + title = {MTEB: Massive Text Embedding Benchmark}, + url = {https://arxiv.org/abs/2210.07316}, + year = {2022}, +} +""", ) diff --git a/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py b/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py index b96d3d944b..788c0b82e8 100644 --- a/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py +++ b/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py @@ -28,12 +28,14 @@ class AskUbuntuDupQuestions(AbsTaskReranking): dialect=[], sample_creation="found", prompt="Retrieve duplicate questions from AskUbuntu forum", - bibtex_citation="""@article{wang-2021-TSDAE, - title = "TSDAE: Using Transformer-based Sequential Denoising Auto-Encoderfor Unsupervised Sentence Embedding Learning", - author = "Wang, Kexin and Reimers, Nils and Gurevych, Iryna", - journal= "arXiv preprint arXiv:2104.06979", - month = "4", - year = "2021", - url = "https://arxiv.org/abs/2104.06979", -}""", + bibtex_citation=r""" +@article{wang-2021-TSDAE, + author = {Wang, Kexin and Reimers, Nils and Gurevych, Iryna}, + journal = {arXiv preprint arXiv:2104.06979}, + month = {4}, + title = {TSDAE: Using Transformer-based Sequential Denoising Auto-Encoderfor Unsupervised Sentence Embedding Learning}, + url = {https://arxiv.org/abs/2104.06979}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Reranking/eng/BuiltBenchReranking.py b/mteb/tasks/Reranking/eng/BuiltBenchReranking.py index 890978fbf9..3922f4b092 100644 --- a/mteb/tasks/Reranking/eng/BuiltBenchReranking.py +++ b/mteb/tasks/Reranking/eng/BuiltBenchReranking.py @@ -27,12 +27,14 @@ class BuiltBenchReranking(AbsTaskReranking): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation="""@article{shahinmoghadam2024benchmarking, - title={Benchmarking pre-trained text embedding models in aligning built asset information}, - author={Shahinmoghadam, Mehrzad and Motamedi, Ali}, - journal={arXiv preprint arXiv:2411.12056}, - year={2024} -}""", + bibtex_citation=r""" +@article{shahinmoghadam2024benchmarking, + author = {Shahinmoghadam, Mehrzad and Motamedi, Ali}, + journal = {arXiv preprint arXiv:2411.12056}, + title = {Benchmarking pre-trained text embedding models in aligning built asset information}, + year = {2024}, +} +""", prompt={ "query": "Given a query, retrieve relevant entity descriptions from buit asset classification systems such as IFC and Uniclass" }, diff --git a/mteb/tasks/Reranking/eng/MindSmallReranking.py b/mteb/tasks/Reranking/eng/MindSmallReranking.py index a0f0428958..db54158188 100644 --- a/mteb/tasks/Reranking/eng/MindSmallReranking.py +++ b/mteb/tasks/Reranking/eng/MindSmallReranking.py @@ -28,23 +28,33 @@ class MindSmallReranking(AbsTaskReranking): dialect=[], sample_creation="found", prompt="Retrieve relevant news articles based on user browsing history", - bibtex_citation="""@inproceedings{wu-etal-2020-mind, title = "{MIND}: A Large-scale Dataset for News - Recommendation", author = "Wu, Fangzhao and Qiao, Ying and Chen, Jiun-Hung and Wu, Chuhan and Qi, - Tao and Lian, Jianxun and Liu, Danyang and Xie, Xing and Gao, Jianfeng and Wu, Winnie and Zhou, Ming", - editor = "Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel", booktitle = - "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics", month = jul, - year = "2020", address = "Online", publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.acl-main.331", doi = "10.18653/v1/2020.acl-main.331", - pages = "3597--3606", abstract = "News recommendation is an important technique for personalized news - service. Compared with product and movie recommendations which have been comprehensively studied, - the research on news recommendation is much more limited, mainly due to the lack of a high-quality benchmark - dataset. In this paper, we present a large-scale dataset named MIND for news recommendation. Constructed from - the user click logs of Microsoft News, MIND contains 1 million users and more than 160k English news - articles, each of which has rich textual content such as title, abstract and body. We demonstrate MIND a good - testbed for news recommendation through a comparative study of several state-of-the-art news recommendation - methods which are originally developed on different proprietary datasets. Our results show the performance of - news recommendation highly relies on the quality of news content understanding and user interest modeling. - Many natural language processing techniques such as effective text representation methods and pre-trained - language models can effectively improve the performance of news recommendation. The MIND dataset will be - available at https://msnews.github.io.", }""", + bibtex_citation=r""" +@inproceedings{wu-etal-2020-mind, + abstract = {News recommendation is an important technique for personalized news +service. Compared with product and movie recommendations which have been comprehensively studied, +the research on news recommendation is much more limited, mainly due to the lack of a high-quality benchmark +dataset. In this paper, we present a large-scale dataset named MIND for news recommendation. Constructed from +the user click logs of Microsoft News, MIND contains 1 million users and more than 160k English news +articles, each of which has rich textual content such as title, abstract and body. We demonstrate MIND a good +testbed for news recommendation through a comparative study of several state-of-the-art news recommendation +methods which are originally developed on different proprietary datasets. Our results show the performance of +news recommendation highly relies on the quality of news content understanding and user interest modeling. +Many natural language processing techniques such as effective text representation methods and pre-trained +language models can effectively improve the performance of news recommendation. The MIND dataset will be +available at https://msnews.github.io.}, + address = {Online}, + author = {Wu, Fangzhao and Qiao, Ying and Chen, Jiun-Hung and Wu, Chuhan and Qi, +Tao and Lian, Jianxun and Liu, Danyang and Xie, Xing and Gao, Jianfeng and Wu, Winnie and Zhou, Ming}, + booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/2020.acl-main.331}, + editor = {Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel}, + month = jul, + pages = {3597--3606}, + publisher = {Association for Computational Linguistics}, + title = {{MIND}: A Large-scale Dataset for News +Recommendation}, + url = {https://aclanthology.org/2020.acl-main.331}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/Reranking/eng/SciDocsReranking.py b/mteb/tasks/Reranking/eng/SciDocsReranking.py index c8ba6d5e2b..99edf718d4 100644 --- a/mteb/tasks/Reranking/eng/SciDocsReranking.py +++ b/mteb/tasks/Reranking/eng/SciDocsReranking.py @@ -28,27 +28,27 @@ class SciDocsReranking(AbsTaskReranking): dialect=None, sample_creation="found", prompt="Given a title of a scientific paper, retrieve the titles of other relevant papers", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{cohan-etal-2020-specter, - title = "{SPECTER}: Document-level Representation Learning using Citation-informed Transformers", - author = "Cohan, Arman and - Feldman, Sergey and - Beltagy, Iz and - Downey, Doug and - Weld, Daniel", - editor = "Jurafsky, Dan and - Chai, Joyce and - Schluter, Natalie and - Tetreault, Joel", - booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics", - month = jul, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.acl-main.207", - doi = "10.18653/v1/2020.acl-main.207", - pages = "2270--2282", - abstract = "Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark.", + abstract = {Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark.}, + address = {Online}, + author = {Cohan, Arman and +Feldman, Sergey and +Beltagy, Iz and +Downey, Doug and +Weld, Daniel}, + booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/2020.acl-main.207}, + editor = {Jurafsky, Dan and +Chai, Joyce and +Schluter, Natalie and +Tetreault, Joel}, + month = jul, + pages = {2270--2282}, + publisher = {Association for Computational Linguistics}, + title = {{SPECTER}: Document-level Representation Learning using Citation-informed Transformers}, + url = {https://aclanthology.org/2020.acl-main.207}, + year = {2020}, } """, adapted_from=["SCIDOCS"], diff --git a/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py b/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py index 897f9d7bc9..82179a01bd 100644 --- a/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py +++ b/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py @@ -28,11 +28,13 @@ class StackOverflowDupQuestions(AbsTaskReranking): dialect=[], sample_creation="found", prompt="Retrieve duplicate questions from StackOverflow forum", - bibtex_citation="""@article{Liu2018LinkSOAD, - title={LinkSO: a dataset for learning to retrieve similar question answer pairs on software development forums}, - author={Xueqing Liu and Chi Wang and Yue Leng and ChengXiang Zhai}, - journal={Proceedings of the 4th ACM SIGSOFT International Workshop on NLP for Software Engineering}, - year={2018}, - url={https://api.semanticscholar.org/CorpusID:53111679} -}""", + bibtex_citation=r""" +@article{Liu2018LinkSOAD, + author = {Xueqing Liu and Chi Wang and Yue Leng and ChengXiang Zhai}, + journal = {Proceedings of the 4th ACM SIGSOFT International Workshop on NLP for Software Engineering}, + title = {LinkSO: a dataset for learning to retrieve similar question answer pairs on software development forums}, + url = {https://api.semanticscholar.org/CorpusID:53111679}, + year = {2018}, +} +""", ) diff --git a/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py b/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py index 9db3acb394..a47d0c653e 100644 --- a/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py +++ b/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py @@ -37,16 +37,16 @@ class WebLINXCandidatesReranking(AbsTaskReranking): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" + bibtex_citation=r""" @misc{lù2024weblinx, - title={WebLINX: Real-World Website Navigation with Multi-Turn Dialogue}, - author={Xing Han Lù and Zdeněk Kasner and Siva Reddy}, - year={2024}, - eprint={2402.05930}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + archiveprefix = {arXiv}, + author = {Xing Han Lù and Zdeněk Kasner and Siva Reddy}, + eprint = {2402.05930}, + primaryclass = {cs.CL}, + title = {WebLINX: Real-World Website Navigation with Multi-Turn Dialogue}, + year = {2024}, } - """, +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Reranking/fra/AlloprofReranking.py b/mteb/tasks/Reranking/fra/AlloprofReranking.py index 20d24f03ec..f9f36f43c4 100644 --- a/mteb/tasks/Reranking/fra/AlloprofReranking.py +++ b/mteb/tasks/Reranking/fra/AlloprofReranking.py @@ -29,16 +29,18 @@ class AlloprofReranking(AbsTaskReranking): annotations_creators="expert-annotated", dialect=None, sample_creation="found", - bibtex_citation="""@misc{lef23, - doi = {10.48550/ARXIV.2302.07738}, - url = {https://arxiv.org/abs/2302.07738}, - author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, - keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, - publisher = {arXiv}, - year = {2023}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} - }""", + bibtex_citation=r""" +@misc{lef23, + author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}, + doi = {10.48550/ARXIV.2302.07738}, + keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, + publisher = {arXiv}, + title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, + url = {https://arxiv.org/abs/2302.07738}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Reranking/fra/SyntecReranking.py b/mteb/tasks/Reranking/fra/SyntecReranking.py index de30fc4b85..fd0444ce55 100644 --- a/mteb/tasks/Reranking/fra/SyntecReranking.py +++ b/mteb/tasks/Reranking/fra/SyntecReranking.py @@ -29,14 +29,16 @@ class SyntecReranking(AbsTaskReranking): annotations_creators="human-annotated", dialect=None, sample_creation="found", - bibtex_citation="""@misc{ciancone2024extending, - title={Extending the Massive Text Embedding Benchmark to French}, - author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, - year={2024}, - eprint={2405.20468}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{ciancone2024extending, + archiveprefix = {arXiv}, + author = {Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, + eprint = {2405.20468}, + primaryclass = {cs.CL}, + title = {Extending the Massive Text Embedding Benchmark to French}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Reranking/jpn/MMarcoReranking.py b/mteb/tasks/Reranking/jpn/MMarcoReranking.py index 54290b61cd..fef3d4980e 100644 --- a/mteb/tasks/Reranking/jpn/MMarcoReranking.py +++ b/mteb/tasks/Reranking/jpn/MMarcoReranking.py @@ -27,12 +27,15 @@ class VoyageMMarcoReranking(AbsTaskReranking): dialect=["jpn-Jpan"], sample_creation="found", prompt="Given a Japanese search query, retrieve web passages that answer the question", - bibtex_citation="""@misc{clavié2023jacolbert, - title={JaColBERT and Hard Negatives, Towards Better Japanese-First Embeddings for Retrieval: Early Technical Report}, - author={Benjamin Clavié}, - year={2023}, - eprint={2312.16144}, - archivePrefix={arXiv},}""", + bibtex_citation=r""" +@misc{clavié2023jacolbert, + archiveprefix = {arXiv}, + author = {Benjamin Clavié}, + eprint = {2312.16144}, + title = {JaColBERT and Hard Negatives, Towards Better Japanese-First Embeddings for Retrieval: Early Technical Report}, + year = {2023}, +} +""", ) def dataset_transform(self): diff --git a/mteb/tasks/Reranking/multilingual/ESCIReranking.py b/mteb/tasks/Reranking/multilingual/ESCIReranking.py index 03c6608f27..08f7c86bd6 100644 --- a/mteb/tasks/Reranking/multilingual/ESCIReranking.py +++ b/mteb/tasks/Reranking/multilingual/ESCIReranking.py @@ -15,12 +15,12 @@ "jp": ["jpn-Jpan"], } -_CITATION = """@article{reddy2022shopping, - title={Shopping Queries Dataset: A Large-Scale {ESCI} Benchmark for Improving Product Search}, - author={Chandan K. Reddy and Lluís Màrquez and Fran Valero and Nikhil Rao and Hugo Zaragoza and Sambaran Bandyopadhyay and Arnab Biswas and Anlu Xing and Karthik Subbian}, - year={2022}, - eprint={2206.06588}, - archivePrefix={arXiv} +_CITATION = r"""@article{reddy2022shopping, + archiveprefix = {arXiv}, + author = {Chandan K. Reddy and Lluís Màrquez and Fran Valero and Nikhil Rao and Hugo Zaragoza and Sambaran Bandyopadhyay and Arnab Biswas and Anlu Xing and Karthik Subbian}, + eprint = {2206.06588}, + title = {Shopping Queries Dataset: A Large-Scale {ESCI} Benchmark for Improving Product Search}, + year = {2022}, }""" diff --git a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py index 267c832638..535d777bab 100644 --- a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py +++ b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py @@ -37,16 +37,16 @@ "zh": ["zho-Hans"], } -_CITATION = """@article{10.1162/tacl_a_00595, - author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy}, - title = "{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}", - journal = {Transactions of the Association for Computational Linguistics}, - volume = {11}, - pages = {1114-1131}, - year = {2023}, - month = {09}, - issn = {2307-387X}, - doi = {10.1162/tacl_a_00595}, +_CITATION = r"""@article{10.1162/tacl_a_00595, + author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy}, + doi = {10.1162/tacl_a_00595}, + issn = {2307-387X}, + journal = {Transactions of the Association for Computational Linguistics}, + month = {09}, + pages = {1114-1131}, + title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}}, + volume = {11}, + year = {2023}, }""" diff --git a/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py b/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py index 3bfbd04f13..cb42eb532a 100644 --- a/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py +++ b/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py @@ -47,9 +47,11 @@ class WikipediaRerankingMultilingual(MultilingualTask, AbsTaskReranking): annotations_creators="LM-generated and reviewed", dialect=[], sample_creation="LM-generated and verified", - bibtex_citation="""@ONLINE{wikidump, - author = "Wikimedia Foundation", - title = "Wikimedia Downloads", - url = "https://dumps.wikimedia.org" -}""", + bibtex_citation=r""" +@online{wikidump, + author = {Wikimedia Foundation}, + title = {Wikimedia Downloads}, + url = {https://dumps.wikimedia.org}, +} +""", ) diff --git a/mteb/tasks/Reranking/rus/RuBQReranking.py b/mteb/tasks/Reranking/rus/RuBQReranking.py index fb79a17588..8399005f2a 100644 --- a/mteb/tasks/Reranking/rus/RuBQReranking.py +++ b/mteb/tasks/Reranking/rus/RuBQReranking.py @@ -27,13 +27,15 @@ class RuBQReranking(AbsTaskReranking): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{RuBQ2021, - title={RuBQ 2.0: An Innovated Russian Question Answering Dataset}, - author={Ivan Rybin and Vladislav Korablinov and Pavel Efimov and Pavel Braslavski}, - booktitle={ESWC}, - year={2021}, - pages={532--547} - }""", + bibtex_citation=r""" +@inproceedings{RuBQ2021, + author = {Ivan Rybin and Vladislav Korablinov and Pavel Efimov and Pavel Braslavski}, + booktitle = {ESWC}, + pages = {532--547}, + title = {RuBQ 2.0: An Innovated Russian Question Answering Dataset}, + year = {2021}, +} +""", prompt={ "query": "Given a question, retrieve Wikipedia passages that answer the question.", }, diff --git a/mteb/tasks/Reranking/zho/CMTEBReranking.py b/mteb/tasks/Reranking/zho/CMTEBReranking.py index ea74d1fd34..2ad84444b2 100644 --- a/mteb/tasks/Reranking/zho/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zho/CMTEBReranking.py @@ -28,14 +28,16 @@ class T2Reranking(AbsTaskReranking): dialect=None, sample_creation=None, prompt="Given a Chinese search query, retrieve web passages that answer the question", - bibtex_citation="""@misc{xie2023t2ranking, - title={T2Ranking: A large-scale Chinese Benchmark for Passage Ranking}, - author={Xiaohui Xie and Qian Dong and Bingning Wang and Feiyang Lv and Ting Yao and Weinan Gan and Zhijing Wu and Xiangsheng Li and Haitao Li and Yiqun Liu and Jin Ma}, - year={2023}, - eprint={2304.03679}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{xie2023t2ranking, + archiveprefix = {arXiv}, + author = {Xiaohui Xie and Qian Dong and Bingning Wang and Feiyang Lv and Ting Yao and Weinan Gan and Zhijing Wu and Xiangsheng Li and Haitao Li and Yiqun Liu and Jin Ma}, + eprint = {2304.03679}, + primaryclass = {cs.IR}, + title = {T2Ranking: A large-scale Chinese Benchmark for Passage Ranking}, + year = {2023}, +} +""", ) @@ -63,14 +65,16 @@ class MMarcoReranking(AbsTaskReranking): dialect=None, sample_creation=None, prompt="Given a Chinese search query, retrieve web passages that answer the question", - bibtex_citation="""@misc{bonifacio2021mmarco, - title={mMARCO: A Multilingual Version of MS MARCO Passage Ranking Dataset}, - author={Luiz Henrique Bonifacio and Vitor Jeronymo and Hugo Queiroz Abonizio and Israel Campiotti and Marzieh Fadaee and and Roberto Lotufo and Rodrigo Nogueira}, - year={2021}, - eprint={2108.13897}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{bonifacio2021mmarco, + archiveprefix = {arXiv}, + author = {Luiz Henrique Bonifacio and Vitor Jeronymo and Hugo Queiroz Abonizio and Israel Campiotti and Marzieh Fadaee and and Roberto Lotufo and Rodrigo Nogueira}, + eprint = {2108.13897}, + primaryclass = {cs.CL}, + title = {mMARCO: A Multilingual Version of MS MARCO Passage Ranking Dataset}, + year = {2021}, +} +""", ) @@ -97,16 +101,18 @@ class CMedQAv1(AbsTaskReranking): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{zhang2017chinese, - title={Chinese Medical Question Answer Matching Using End-to-End Character-Level Multi-Scale CNNs}, - author={Zhang, Sheng and Zhang, Xin and Wang, Hui and Cheng, Jiajun and Li, Pei and Ding, Zhaoyun}, - journal={Applied Sciences}, - volume={7}, - number={8}, - pages={767}, - year={2017}, - publisher={Multidisciplinary Digital Publishing Institute} -}""", + bibtex_citation=r""" +@article{zhang2017chinese, + author = {Zhang, Sheng and Zhang, Xin and Wang, Hui and Cheng, Jiajun and Li, Pei and Ding, Zhaoyun}, + journal = {Applied Sciences}, + number = {8}, + pages = {767}, + publisher = {Multidisciplinary Digital Publishing Institute}, + title = {Chinese Medical Question Answer Matching Using End-to-End Character-Level Multi-Scale CNNs}, + volume = {7}, + year = {2017}, +} +""", ) @@ -134,16 +140,19 @@ class CMedQAv2(AbsTaskReranking): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@ARTICLE{8548603, -author={S. Zhang and X. Zhang and H. Wang and L. Guo and S. Liu}, -journal={IEEE Access}, -title={Multi-Scale Attentive Interaction Networks for Chinese Medical Question Answer Selection}, -year={2018}, -volume={6}, -number={}, -pages={74061-74071}, -keywords={Biomedical imaging;Data mining;Semantics;Medical services;Feature extraction;Knowledge discovery;Medical question answering;interactive attention;deep learning;deep neural networks}, -doi={10.1109/ACCESS.2018.2883637}, -ISSN={2169-3536}, -month={},}""", + bibtex_citation=r""" +@article{8548603, + author = {S. Zhang and X. Zhang and H. Wang and L. Guo and S. Liu}, + doi = {10.1109/ACCESS.2018.2883637}, + issn = {2169-3536}, + journal = {IEEE Access}, + keywords = {Biomedical imaging;Data mining;Semantics;Medical services;Feature extraction;Knowledge discovery;Medical question answering;interactive attention;deep learning;deep neural networks}, + month = {}, + number = {}, + pages = {74061-74071}, + title = {Multi-Scale Attentive Interaction Networks for Chinese Medical Question Answer Selection}, + volume = {6}, + year = {2018}, +} +""", ) diff --git a/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py b/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py index 2009a91c79..f82d06ee61 100644 --- a/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py +++ b/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py @@ -1,60 +1,60 @@ -from __future__ import annotations - -import datasets - -from mteb.abstasks.TaskMetadata import TaskMetadata - -from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval - - -class SadeemQuestionRetrieval(AbsTaskRetrieval): - _EVAL_SPLIT = "test" - - metadata = TaskMetadata( - name="SadeemQuestionRetrieval", - dataset={ - "path": "sadeem-ai/sadeem-ar-eval-retrieval-questions", - "revision": "3cb0752b182e5d5d740df547748b06663c8e0bd9", - "name": "test", - }, - reference="https://huggingface.co/datasets/sadeem-ai/sadeem-ar-eval-retrieval-questions", - description="SadeemQuestion: A Benchmark Data Set for Community Question-Retrieval Research", - type="Retrieval", - category="s2p", - modalities=["text"], - eval_splits=[_EVAL_SPLIT], - eval_langs=["ara-Arab"], - main_score="ndcg_at_10", - date=("2024-01-01", "2024-04-01"), - domains=["Written", "Written"], - task_subtypes=["Article retrieval"], - license="not specified", - annotations_creators="derived", - dialect=[], - sample_creation="found", - bibtex_citation=""" - @inproceedings{sadeem-2024-ar-retrieval-questions, - title = "SadeemQuestionRetrieval: A New Benchmark for Arabic questions-based Articles Searching.", - author = "abubakr.soliman@sadeem.app" - } - """, - ) - - def load_data(self, **kwargs): - if self.data_loaded: - return - - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])["queries"] - queries = {row["query-id"]: row["text"] for row in query_list} - - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])["corpus"] - corpus = {row["corpus-id"]: {"text": row["text"]} for row in corpus_list} - - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])["qrels"] - qrels = {row["query-id"]: {row["corpus-id"]: 1} for row in qrels_list} - - self.corpus = {self._EVAL_SPLIT: corpus} - self.queries = {self._EVAL_SPLIT: queries} - self.relevant_docs = {self._EVAL_SPLIT: qrels} - - self.data_loaded = True +from __future__ import annotations + +import datasets + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval + + +class SadeemQuestionRetrieval(AbsTaskRetrieval): + _EVAL_SPLIT = "test" + + metadata = TaskMetadata( + name="SadeemQuestionRetrieval", + dataset={ + "path": "sadeem-ai/sadeem-ar-eval-retrieval-questions", + "revision": "3cb0752b182e5d5d740df547748b06663c8e0bd9", + "name": "test", + }, + reference="https://huggingface.co/datasets/sadeem-ai/sadeem-ar-eval-retrieval-questions", + description="SadeemQuestion: A Benchmark Data Set for Community Question-Retrieval Research", + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=[_EVAL_SPLIT], + eval_langs=["ara-Arab"], + main_score="ndcg_at_10", + date=("2024-01-01", "2024-04-01"), + domains=["Written", "Written"], + task_subtypes=["Article retrieval"], + license="not specified", + annotations_creators="derived", + dialect=[], + sample_creation="found", + bibtex_citation=r""" +@inproceedings{sadeem-2024-ar-retrieval-questions, + author = {abubakr.soliman@sadeem.app}, + title = {SadeemQuestionRetrieval: A New Benchmark for Arabic questions-based Articles Searching.}, +} +""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + query_list = datasets.load_dataset(**self.metadata_dict["dataset"])["queries"] + queries = {row["query-id"]: row["text"] for row in query_list} + + corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])["corpus"] + corpus = {row["corpus-id"]: {"text": row["text"]} for row in corpus_list} + + qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])["qrels"] + qrels = {row["query-id"]: {row["corpus-id"]: 1} for row in qrels_list} + + self.corpus = {self._EVAL_SPLIT: corpus} + self.queries = {self._EVAL_SPLIT: queries} + self.relevant_docs = {self._EVAL_SPLIT: qrels} + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/code/AppsRetrieval.py b/mteb/tasks/Retrieval/code/AppsRetrieval.py index e207f8e340..62fc53560f 100644 --- a/mteb/tasks/Retrieval/code/AppsRetrieval.py +++ b/mteb/tasks/Retrieval/code/AppsRetrieval.py @@ -28,10 +28,12 @@ class AppsRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{hendrycksapps2021, - title={Measuring Coding Challenge Competence With APPS}, - author={Dan Hendrycks and Steven Basart and Saurav Kadavath and Mantas Mazeika and Akul Arora and Ethan Guo and Collin Burns and Samir Puranik and Horace He and Dawn Song and Jacob Steinhardt}, - journal={NeurIPS}, - year={2021} - }""", + bibtex_citation=r""" +@article{hendrycksapps2021, + author = {Dan Hendrycks and Steven Basart and Saurav Kadavath and Mantas Mazeika and Akul Arora and Ethan Guo and Collin Burns and Samir Puranik and Horace He and Dawn Song and Jacob Steinhardt}, + journal = {NeurIPS}, + title = {Measuring Coding Challenge Competence With APPS}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py b/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py index 29858026a6..5306f033aa 100644 --- a/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py +++ b/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py @@ -96,7 +96,14 @@ class COIRCodeSearchNetRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="@article{husain2019codesearchnet, title={{CodeSearchNet} challenge: Evaluating the state of semantic code search}, author={Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, journal={arXiv preprint arXiv:1909.09436}, year={2019} }", + bibtex_citation=r""" +@article{husain2019codesearchnet, + author = {Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, + journal = {arXiv preprint arXiv:1909.09436}, + title = {{CodeSearchNet} challenge: Evaluating the state of semantic code search}, + year = {2019}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py b/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py index e3175fa324..4f46641d69 100644 --- a/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py @@ -46,7 +46,14 @@ class CodeEditSearchRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="@article{muennighoff2023octopack, title={OctoPack: Instruction Tuning Code Large Language Models}, author={Niklas Muennighoff and Qian Liu and Armel Zebaze and Qinkai Zheng and Binyuan Hui and Terry Yue Zhuo and Swayam Singh and Xiangru Tang and Leandro von Werra and Shayne Longpre}, journal={arXiv preprint arXiv:2308.07124}, year={2023} }", + bibtex_citation=r""" +@article{muennighoff2023octopack, + author = {Niklas Muennighoff and Qian Liu and Armel Zebaze and Qinkai Zheng and Binyuan Hui and Terry Yue Zhuo and Swayam Singh and Xiangru Tang and Leandro von Werra and Shayne Longpre}, + journal = {arXiv preprint arXiv:2308.07124}, + title = {OctoPack: Instruction Tuning Code Large Language Models}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/code/CodeFeedbackMTRetrieval.py b/mteb/tasks/Retrieval/code/CodeFeedbackMTRetrieval.py index fcb1a822b2..45dbcb8603 100644 --- a/mteb/tasks/Retrieval/code/CodeFeedbackMTRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeFeedbackMTRetrieval.py @@ -28,13 +28,15 @@ class CodeFeedbackMT(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{zheng2024opencodeinterpreterintegratingcodegeneration, - title={OpenCodeInterpreter: Integrating Code Generation with Execution and Refinement}, - author={Tianyu Zheng and Ge Zhang and Tianhao Shen and Xueling Liu and Bill Yuchen Lin and Jie Fu and Wenhu Chen and Xiang Yue}, - year={2024}, - eprint={2402.14658}, - archivePrefix={arXiv}, - primaryClass={cs.SE}, - url={https://arxiv.org/abs/2402.14658}, - }""", + bibtex_citation=r""" +@misc{zheng2024opencodeinterpreterintegratingcodegeneration, + archiveprefix = {arXiv}, + author = {Tianyu Zheng and Ge Zhang and Tianhao Shen and Xueling Liu and Bill Yuchen Lin and Jie Fu and Wenhu Chen and Xiang Yue}, + eprint = {2402.14658}, + primaryclass = {cs.SE}, + title = {OpenCodeInterpreter: Integrating Code Generation with Execution and Refinement}, + url = {https://arxiv.org/abs/2402.14658}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/CodeFeedbackSTRetrieval.py b/mteb/tasks/Retrieval/code/CodeFeedbackSTRetrieval.py index 2a99c990c4..00994ac642 100644 --- a/mteb/tasks/Retrieval/code/CodeFeedbackSTRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeFeedbackSTRetrieval.py @@ -28,13 +28,15 @@ class CodeFeedbackST(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{li2024coircomprehensivebenchmarkcode, - title={CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, - author={Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, - year={2024}, - eprint={2407.02883}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2407.02883}, - }""", + bibtex_citation=r""" +@misc{li2024coircomprehensivebenchmarkcode, + archiveprefix = {arXiv}, + author = {Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, + eprint = {2407.02883}, + primaryclass = {cs.IR}, + title = {CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, + url = {https://arxiv.org/abs/2407.02883}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/CodeRAG.py b/mteb/tasks/Retrieval/code/CodeRAG.py index 1573aa7ff9..0123d30428 100644 --- a/mteb/tasks/Retrieval/code/CodeRAG.py +++ b/mteb/tasks/Retrieval/code/CodeRAG.py @@ -30,14 +30,14 @@ def split_by_first_newline(s): "sample_creation": "found", "bibtex_citation": """ @misc{wang2024coderagbenchretrievalaugmentcode, - title={CodeRAG-Bench: Can Retrieval Augment Code Generation?}, - author={Zora Zhiruo Wang and Akari Asai and Xinyan Velocity Yu and Frank F. Xu and Yiqing Xie and Graham Neubig and Daniel Fried}, - year={2024}, - eprint={2406.14497}, - archivePrefix={arXiv}, - primaryClass={cs.SE}, - url={https://arxiv.org/abs/2406.14497}, - } + archiveprefix = {arXiv}, + author = {Zora Zhiruo Wang and Akari Asai and Xinyan Velocity Yu and Frank F. Xu and Yiqing Xie and Graham Neubig and Daniel Fried}, + eprint = {2406.14497}, + primaryclass = {cs.SE}, + title = {CodeRAG-Bench: Can Retrieval Augment Code Generation?}, + url = {https://arxiv.org/abs/2406.14497}, + year = {2024}, +} """, } diff --git a/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py b/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py index 3f5ca2e028..71e579efcc 100644 --- a/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py @@ -95,15 +95,17 @@ class CodeSearchNetCCRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{li2024coircomprehensivebenchmarkcode, - title={CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, - author={Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, - year={2024}, - eprint={2407.02883}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2407.02883}, - }""", + bibtex_citation=r""" +@misc{li2024coircomprehensivebenchmarkcode, + archiveprefix = {arXiv}, + author = {Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, + eprint = {2407.02883}, + primaryclass = {cs.IR}, + title = {CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, + url = {https://arxiv.org/abs/2407.02883}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py b/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py index ddcef675f5..1eb657f35e 100644 --- a/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py @@ -32,7 +32,14 @@ class CodeSearchNetRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="@article{husain2019codesearchnet, title={{CodeSearchNet} challenge: Evaluating the state of semantic code search}, author={Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, journal={arXiv preprint arXiv:1909.09436}, year={2019} }", + bibtex_citation=r""" +@article{husain2019codesearchnet, + author = {Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, + journal = {arXiv preprint arXiv:1909.09436}, + title = {{CodeSearchNet} challenge: Evaluating the state of semantic code search}, + year = {2019}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/code/CodeTransOceanContestRetrieval.py b/mteb/tasks/Retrieval/code/CodeTransOceanContestRetrieval.py index 9933e8fe87..423be6bdfc 100644 --- a/mteb/tasks/Retrieval/code/CodeTransOceanContestRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeTransOceanContestRetrieval.py @@ -28,13 +28,15 @@ class CodeTransOceanContestRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{yan2023codetransoceancomprehensivemultilingualbenchmark, - title={CodeTransOcean: A Comprehensive Multilingual Benchmark for Code Translation}, - author={Weixiang Yan and Yuchen Tian and Yunzhe Li and Qian Chen and Wen Wang}, - year={2023}, - eprint={2310.04951}, - archivePrefix={arXiv}, - primaryClass={cs.AI}, - url={https://arxiv.org/abs/2310.04951}, - }""", + bibtex_citation=r""" +@misc{yan2023codetransoceancomprehensivemultilingualbenchmark, + archiveprefix = {arXiv}, + author = {Weixiang Yan and Yuchen Tian and Yunzhe Li and Qian Chen and Wen Wang}, + eprint = {2310.04951}, + primaryclass = {cs.AI}, + title = {CodeTransOcean: A Comprehensive Multilingual Benchmark for Code Translation}, + url = {https://arxiv.org/abs/2310.04951}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/CodeTransOceanDLRetrieval.py b/mteb/tasks/Retrieval/code/CodeTransOceanDLRetrieval.py index f17e1df43b..77107edde9 100644 --- a/mteb/tasks/Retrieval/code/CodeTransOceanDLRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeTransOceanDLRetrieval.py @@ -28,13 +28,15 @@ class CodeTransOceanDLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{yan2023codetransoceancomprehensivemultilingualbenchmark, - title={CodeTransOcean: A Comprehensive Multilingual Benchmark for Code Translation}, - author={Weixiang Yan and Yuchen Tian and Yunzhe Li and Qian Chen and Wen Wang}, - year={2023}, - eprint={2310.04951}, - archivePrefix={arXiv}, - primaryClass={cs.AI}, - url={https://arxiv.org/abs/2310.04951}, - }""", + bibtex_citation=r""" +@misc{yan2023codetransoceancomprehensivemultilingualbenchmark, + archiveprefix = {arXiv}, + author = {Weixiang Yan and Yuchen Tian and Yunzhe Li and Qian Chen and Wen Wang}, + eprint = {2310.04951}, + primaryclass = {cs.AI}, + title = {CodeTransOcean: A Comprehensive Multilingual Benchmark for Code Translation}, + url = {https://arxiv.org/abs/2310.04951}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/CosQARetrieval.py b/mteb/tasks/Retrieval/code/CosQARetrieval.py index e0b975aebe..9ad7bcb9aa 100644 --- a/mteb/tasks/Retrieval/code/CosQARetrieval.py +++ b/mteb/tasks/Retrieval/code/CosQARetrieval.py @@ -28,13 +28,15 @@ class CosQARetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{huang2021cosqa20000webqueries, - title={CoSQA: 20,000+ Web Queries for Code Search and Question Answering}, - author={Junjie Huang and Duyu Tang and Linjun Shou and Ming Gong and Ke Xu and Daxin Jiang and Ming Zhou and Nan Duan}, - year={2021}, - eprint={2105.13239}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2105.13239}, - }""", + bibtex_citation=r""" +@misc{huang2021cosqa20000webqueries, + archiveprefix = {arXiv}, + author = {Junjie Huang and Duyu Tang and Linjun Shou and Ming Gong and Ke Xu and Daxin Jiang and Ming Zhou and Nan Duan}, + eprint = {2105.13239}, + primaryclass = {cs.CL}, + title = {CoSQA: 20,000+ Web Queries for Code Search and Question Answering}, + url = {https://arxiv.org/abs/2105.13239}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/StackOverflowQARetrieval.py b/mteb/tasks/Retrieval/code/StackOverflowQARetrieval.py index 3f06da1660..3484a15ea9 100644 --- a/mteb/tasks/Retrieval/code/StackOverflowQARetrieval.py +++ b/mteb/tasks/Retrieval/code/StackOverflowQARetrieval.py @@ -28,13 +28,15 @@ class StackOverflowQARetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{li2024coircomprehensivebenchmarkcode, - title={CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, - author={Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, - year={2024}, - eprint={2407.02883}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2407.02883}, - }""", + bibtex_citation=r""" +@misc{li2024coircomprehensivebenchmarkcode, + archiveprefix = {arXiv}, + author = {Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, + eprint = {2407.02883}, + primaryclass = {cs.IR}, + title = {CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, + url = {https://arxiv.org/abs/2407.02883}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Retrieval/code/SyntheticText2SqlRetrieval.py b/mteb/tasks/Retrieval/code/SyntheticText2SqlRetrieval.py index cd4cd8835e..6e3300aea4 100644 --- a/mteb/tasks/Retrieval/code/SyntheticText2SqlRetrieval.py +++ b/mteb/tasks/Retrieval/code/SyntheticText2SqlRetrieval.py @@ -28,11 +28,13 @@ class SyntheticText2SQLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@software{gretel-synthetic-text-to-sql-2024, - author = {Meyer, Yev and Emadi, Marjan and Nathawani, Dhruv and Ramaswamy, Lipika and Boyd, Kendrick and Van Segbroeck, Maarten and Grossman, Matthew and Mlocek, Piotr and Newberry, Drew}, - title = {{Synthetic-Text-To-SQL}: A synthetic dataset for training language models to generate SQL queries from natural language prompts}, - month = {April}, - year = {2024}, - url = {https://huggingface.co/datasets/gretelai/synthetic-text-to-sql} - }""", + bibtex_citation=r""" +@software{gretel-synthetic-text-to-sql-2024, + author = {Meyer, Yev and Emadi, Marjan and Nathawani, Dhruv and Ramaswamy, Lipika and Boyd, Kendrick and Van Segbroeck, Maarten and Grossman, Matthew and Mlocek, Piotr and Newberry, Drew}, + month = {April}, + title = {{Synthetic-Text-To-SQL}: A synthetic dataset for training language models to generate SQL queries from natural language prompts}, + url = {https://huggingface.co/datasets/gretelai/synthetic-text-to-sql}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Retrieval/dan/DanFeverRetrieval.py b/mteb/tasks/Retrieval/dan/DanFeverRetrieval.py index 6a7b239f2f..83ce3dd752 100644 --- a/mteb/tasks/Retrieval/dan/DanFeverRetrieval.py +++ b/mteb/tasks/Retrieval/dan/DanFeverRetrieval.py @@ -27,21 +27,21 @@ class DanFeverRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{norregaard-derczynski-2021-danfever, - title = "{D}an{FEVER}: claim verification dataset for {D}anish", - author = "N{\o}rregaard, Jeppe and - Derczynski, Leon", - editor = "Dobnik, Simon and - {\O}vrelid, Lilja", - booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may # " 31--2 " # jun, - year = "2021", - address = "Reykjavik, Iceland (Online)", - publisher = {Link{\"o}ping University Electronic Press, Sweden}, - url = "https://aclanthology.org/2021.nodalida-main.47", - pages = "422--428", - abstract = "We present a dataset, DanFEVER, intended for multilingual misinformation research. The dataset is in Danish and has the same format as the well-known English FEVER dataset. It can be used for testing methods in multilingual settings, as well as for creating models in production for the Danish language.", + abstract = {We present a dataset, DanFEVER, intended for multilingual misinformation research. The dataset is in Danish and has the same format as the well-known English FEVER dataset. It can be used for testing methods in multilingual settings, as well as for creating models in production for the Danish language.}, + address = {Reykjavik, Iceland (Online)}, + author = {N{\o}rregaard, Jeppe and +Derczynski, Leon}, + booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Dobnik, Simon and +{\O}vrelid, Lilja}, + month = may # { 31--2 } # jun, + pages = {422--428}, + publisher = {Link{\"o}ping University Electronic Press, Sweden}, + title = {{D}an{FEVER}: claim verification dataset for {D}anish}, + url = {https://aclanthology.org/2021.nodalida-main.47}, + year = {2021}, } """, prompt={ @@ -130,21 +130,21 @@ class DanFever(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{norregaard-derczynski-2021-danfever, - title = "{D}an{FEVER}: claim verification dataset for {D}anish", - author = "N{\o}rregaard, Jeppe and - Derczynski, Leon", - editor = "Dobnik, Simon and - {\O}vrelid, Lilja", - booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may # " 31--2 " # jun, - year = "2021", - address = "Reykjavik, Iceland (Online)", - publisher = {Link{\"o}ping University Electronic Press, Sweden}, - url = "https://aclanthology.org/2021.nodalida-main.47", - pages = "422--428", - abstract = "We present a dataset, DanFEVER, intended for multilingual misinformation research. The dataset is in Danish and has the same format as the well-known English FEVER dataset. It can be used for testing methods in multilingual settings, as well as for creating models in production for the Danish language.", + abstract = {We present a dataset, DanFEVER, intended for multilingual misinformation research. The dataset is in Danish and has the same format as the well-known English FEVER dataset. It can be used for testing methods in multilingual settings, as well as for creating models in production for the Danish language.}, + address = {Reykjavik, Iceland (Online)}, + author = {N{\o}rregaard, Jeppe and +Derczynski, Leon}, + booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Dobnik, Simon and +{\O}vrelid, Lilja}, + month = may # { 31--2 } # jun, + pages = {422--428}, + publisher = {Link{\"o}ping University Electronic Press, Sweden}, + title = {{D}an{FEVER}: claim verification dataset for {D}anish}, + url = {https://aclanthology.org/2021.nodalida-main.47}, + year = {2021}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/dan/TV2Nordretrieval.py b/mteb/tasks/Retrieval/dan/TV2Nordretrieval.py index 1abc46fcc9..388997c7f6 100644 --- a/mteb/tasks/Retrieval/dan/TV2Nordretrieval.py +++ b/mteb/tasks/Retrieval/dan/TV2Nordretrieval.py @@ -27,34 +27,36 @@ class TV2Nordretrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{flansmose-mikkelsen-etal-2022-ddisco, - title = "{DD}is{C}o: A Discourse Coherence Dataset for {D}anish", - author = "Flansmose Mikkelsen, Linea and - Kinch, Oliver and - Jess Pedersen, Anders and - Lacroix, Oph{\'e}lie", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.260", - pages = "2440--2445", - abstract = "To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.", -}""", + bibtex_citation=r""" +@inproceedings{flansmose-mikkelsen-etal-2022-ddisco, + abstract = {To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.}, + address = {Marseille, France}, + author = {Flansmose Mikkelsen, Linea and +Kinch, Oliver and +Jess Pedersen, Anders and +Lacroix, Oph{\'e}lie}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Odijk, Jan and +Piperidis, Stelios}, + month = jun, + pages = {2440--2445}, + publisher = {European Language Resources Association}, + title = {{DD}is{C}o: A Discourse Coherence Dataset for {D}anish}, + url = {https://aclanthology.org/2022.lrec-1.260}, + year = {2022}, +} +""", prompt={ "query": "Given a summary of a Danish news article retrieve the corresponding news article" }, diff --git a/mteb/tasks/Retrieval/dan/TwitterHjerneRetrieval.py b/mteb/tasks/Retrieval/dan/TwitterHjerneRetrieval.py index 5bc91789e7..198d1bc1b5 100644 --- a/mteb/tasks/Retrieval/dan/TwitterHjerneRetrieval.py +++ b/mteb/tasks/Retrieval/dan/TwitterHjerneRetrieval.py @@ -27,11 +27,11 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @article{holm2024gllms, - title={Are GLLMs Danoliterate? Benchmarking Generative NLP in Danish}, - author={Holm, Soren Vejlgaard}, - year={2024} + author = {Holm, Soren Vejlgaard}, + title = {Are GLLMs Danoliterate? Benchmarking Generative NLP in Danish}, + year = {2024}, } """, prompt={"query": "Retrieve answers to questions asked in Danish tweets"}, diff --git a/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py b/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py index 745a0fe60d..380f30dcdf 100644 --- a/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py @@ -31,19 +31,21 @@ class GerDaLIR(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{wrzalik-krechel-2021-gerdalir, - title = "{G}er{D}a{LIR}: A {G}erman Dataset for Legal Information Retrieval", - author = "Wrzalik, Marco and - Krechel, Dirk", - booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021", - month = nov, - year = "2021", - address = "Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.nllp-1.13", - pages = "123--128", - abstract = "We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.", -}""", + bibtex_citation=r""" +@inproceedings{wrzalik-krechel-2021-gerdalir, + abstract = {We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.}, + address = {Punta Cana, Dominican Republic}, + author = {Wrzalik, Marco and +Krechel, Dirk}, + booktitle = {Proceedings of the Natural Legal Language Processing Workshop 2021}, + month = nov, + pages = {123--128}, + publisher = {Association for Computational Linguistics}, + title = {{G}er{D}a{LIR}: A {G}erman Dataset for Legal Information Retrieval}, + url = {https://aclanthology.org/2021.nllp-1.13}, + year = {2021}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/deu/GerDaLIRSmallRetrieval.py b/mteb/tasks/Retrieval/deu/GerDaLIRSmallRetrieval.py index d80487251e..3325a75c5c 100644 --- a/mteb/tasks/Retrieval/deu/GerDaLIRSmallRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GerDaLIRSmallRetrieval.py @@ -27,17 +27,19 @@ class GerDaLIRSmall(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@inproceedings{wrzalik-krechel-2021-gerdalir, - title = "{G}er{D}a{LIR}: A {G}erman Dataset for Legal Information Retrieval", - author = "Wrzalik, Marco and - Krechel, Dirk", - booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021", - month = nov, - year = "2021", - address = "Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.nllp-1.13", - pages = "123--128", - abstract = "We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.", -}""", + bibtex_citation=r""" +@inproceedings{wrzalik-krechel-2021-gerdalir, + abstract = {We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.}, + address = {Punta Cana, Dominican Republic}, + author = {Wrzalik, Marco and +Krechel, Dirk}, + booktitle = {Proceedings of the Natural Legal Language Processing Workshop 2021}, + month = nov, + pages = {123--128}, + publisher = {Association for Computational Linguistics}, + title = {{G}er{D}a{LIR}: A {G}erman Dataset for Legal Information Retrieval}, + url = {https://aclanthology.org/2021.nllp-1.13}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py b/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py index a55ae4dffb..9b280aaecf 100644 --- a/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py @@ -32,14 +32,16 @@ class GermanDPR(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{möller2021germanquad, - title={GermanQuAD and GermanDPR: Improving Non-English Question Answering and Passage Retrieval}, - author={Timo Möller and Julian Risch and Malte Pietsch}, - year={2021}, - eprint={2104.12741}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{möller2021germanquad, + archiveprefix = {arXiv}, + author = {Timo Möller and Julian Risch and Malte Pietsch}, + eprint = {2104.12741}, + primaryclass = {cs.CL}, + title = {GermanQuAD and GermanDPR: Improving Non-English Question Answering and Passage Retrieval}, + year = {2021}, +} +""", ) @staticmethod diff --git a/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py b/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py index 7d77873882..a6fc74455a 100644 --- a/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py @@ -32,18 +32,20 @@ class GermanGovServiceRetrieval(AbsTaskRetrieval): license="mit", annotations_creators="derived", dialect=[], - bibtex_citation="""@software{lhm-dienstleistungen-qa, - author = {Schröder, Leon Marius and - Gutknecht, Clemens and - Alkiddeh, Oubada and - Susanne Weiß, - Lukas, Leon}, - title = {LHM-Dienstleistungen-QA - german public domain question-answering dataset}, - month = nov, - year = 2022, - publisher = {it@M}, - url = {https://huggingface.co/datasets/it-at-m/LHM-Dienstleistungen-QA} -}""", + bibtex_citation=r""" +@software{lhm-dienstleistungen-qa, + author = {Schröder, Leon Marius and +Gutknecht, Clemens and +Alkiddeh, Oubada and +Susanne Weiß, +Lukas, Leon}, + month = nov, + publisher = {it@M}, + title = {LHM-Dienstleistungen-QA - german public domain question-answering dataset}, + url = {https://huggingface.co/datasets/it-at-m/LHM-Dienstleistungen-QA}, + year = {2022}, +} +""", sample_creation="found", ) diff --git a/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py b/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py index 6f4eada30d..a6b1a5d7d5 100644 --- a/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py @@ -49,14 +49,16 @@ class GermanQuADRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{möller2021germanquad, - title={GermanQuAD and GermanDPR: Improving Non-English Question Answering and Passage Retrieval}, - author={Timo Möller and Julian Risch and Malte Pietsch}, - year={2021}, - eprint={2104.12741}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{möller2021germanquad, + archiveprefix = {arXiv}, + author = {Timo Möller and Julian Risch and Malte Pietsch}, + eprint = {2104.12741}, + primaryclass = {cs.CL}, + title = {GermanQuAD and GermanDPR: Improving Non-English Question Answering and Passage Retrieval}, + year = {2021}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/deu/LegalQuADRetrieval.py b/mteb/tasks/Retrieval/deu/LegalQuADRetrieval.py index 663d427f97..60132b1aad 100644 --- a/mteb/tasks/Retrieval/deu/LegalQuADRetrieval.py +++ b/mteb/tasks/Retrieval/deu/LegalQuADRetrieval.py @@ -27,15 +27,17 @@ class LegalQuAD(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@INPROCEEDINGS{9723721, - author={Hoppe, Christoph and Pelkmann, David and Migenda, Nico and Hötte, Daniel and Schenck, Wolfram}, - booktitle={2021 IEEE Fourth International Conference on Artificial Intelligence and Knowledge Engineering (AIKE)}, - title={Towards Intelligent Legal Advisors for Document Retrieval and Question-Answering in German Legal Documents}, - year={2021}, - volume={}, - number={}, - pages={29-32}, - keywords={Knowledge engineering;Law;Semantic search;Conferences;Bit error rate;NLP;knowledge extraction;question-answering;semantic search;document retrieval;German language}, - doi={10.1109/AIKE52691.2021.00011} - }""", + bibtex_citation=r""" +@inproceedings{9723721, + author = {Hoppe, Christoph and Pelkmann, David and Migenda, Nico and Hötte, Daniel and Schenck, Wolfram}, + booktitle = {2021 IEEE Fourth International Conference on Artificial Intelligence and Knowledge Engineering (AIKE)}, + doi = {10.1109/AIKE52691.2021.00011}, + keywords = {Knowledge engineering;Law;Semantic search;Conferences;Bit error rate;NLP;knowledge extraction;question-answering;semantic search;document retrieval;German language}, + number = {}, + pages = {29-32}, + title = {Towards Intelligent Legal Advisors for Document Retrieval and Question-Answering in German Legal Documents}, + volume = {}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/AILACasedocsRetrieval.py b/mteb/tasks/Retrieval/eng/AILACasedocsRetrieval.py index d1d56e7737..2549e584ba 100644 --- a/mteb/tasks/Retrieval/eng/AILACasedocsRetrieval.py +++ b/mteb/tasks/Retrieval/eng/AILACasedocsRetrieval.py @@ -27,19 +27,21 @@ class AILACasedocs(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@dataset{paheli_bhattacharya_2020_4063986, - author = {Paheli Bhattacharya and - Kripabandhu Ghosh and - Saptarshi Ghosh and - Arindam Pal and - Parth Mehta and - Arnab Bhattacharya and - Prasenjit Majumder}, - title = {AILA 2019 Precedent \& Statute Retrieval Task}, - month = oct, - year = 2020, - publisher = {Zenodo}, - doi = {10.5281/zenodo.4063986}, - url = {https://doi.org/10.5281/zenodo.4063986} -}""", + bibtex_citation=r""" +@dataset{paheli_bhattacharya_2020_4063986, + author = {Paheli Bhattacharya and +Kripabandhu Ghosh and +Saptarshi Ghosh and +Arindam Pal and +Parth Mehta and +Arnab Bhattacharya and +Prasenjit Majumder}, + doi = {10.5281/zenodo.4063986}, + month = oct, + publisher = {Zenodo}, + title = {AILA 2019 Precedent \& Statute Retrieval Task}, + url = {https://doi.org/10.5281/zenodo.4063986}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/AILAStatutesRetrieval.py b/mteb/tasks/Retrieval/eng/AILAStatutesRetrieval.py index 4577c64ed6..43a7b15602 100644 --- a/mteb/tasks/Retrieval/eng/AILAStatutesRetrieval.py +++ b/mteb/tasks/Retrieval/eng/AILAStatutesRetrieval.py @@ -27,19 +27,21 @@ class AILAStatutes(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@dataset{paheli_bhattacharya_2020_4063986, - author = {Paheli Bhattacharya and - Kripabandhu Ghosh and - Saptarshi Ghosh and - Arindam Pal and - Parth Mehta and - Arnab Bhattacharya and - Prasenjit Majumder}, - title = {AILA 2019 Precedent \& Statute Retrieval Task}, - month = oct, - year = 2020, - publisher = {Zenodo}, - doi = {10.5281/zenodo.4063986}, - url = {https://doi.org/10.5281/zenodo.4063986} -}""", + bibtex_citation=r""" +@dataset{paheli_bhattacharya_2020_4063986, + author = {Paheli Bhattacharya and +Kripabandhu Ghosh and +Saptarshi Ghosh and +Arindam Pal and +Parth Mehta and +Arnab Bhattacharya and +Prasenjit Majumder}, + doi = {10.5281/zenodo.4063986}, + month = oct, + publisher = {Zenodo}, + title = {AILA 2019 Precedent \& Statute Retrieval Task}, + url = {https://doi.org/10.5281/zenodo.4063986}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/ARCChallengeRetrieval.py b/mteb/tasks/Retrieval/eng/ARCChallengeRetrieval.py index 7488e902d2..cb96c9cb78 100644 --- a/mteb/tasks/Retrieval/eng/ARCChallengeRetrieval.py +++ b/mteb/tasks/Retrieval/eng/ARCChallengeRetrieval.py @@ -28,17 +28,19 @@ class ARCChallenge(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{clark2018think, - title={Think you have solved question answering? try arc, the ai2 reasoning challenge}, - author={Clark, Peter and Cowhey, Isaac and Etzioni, Oren and Khot, Tushar and Sabharwal, Ashish and Schoenick, Carissa and Tafjord, Oyvind}, - journal={arXiv preprint arXiv:1803.05457}, - year={2018} + author = {Clark, Peter and Cowhey, Isaac and Etzioni, Oren and Khot, Tushar and Sabharwal, Ashish and Schoenick, Carissa and Tafjord, Oyvind}, + journal = {arXiv preprint arXiv:1803.05457}, + title = {Think you have solved question answering? try arc, the ai2 reasoning challenge}, + year = {2018}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={"query": "Retrieve the answer to the question."}, diff --git a/mteb/tasks/Retrieval/eng/AlphaNLIRetrieval.py b/mteb/tasks/Retrieval/eng/AlphaNLIRetrieval.py index 3fd53b5ab5..4ac99f5fcc 100644 --- a/mteb/tasks/Retrieval/eng/AlphaNLIRetrieval.py +++ b/mteb/tasks/Retrieval/eng/AlphaNLIRetrieval.py @@ -28,18 +28,19 @@ class AlphaNLI(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} + bibtex_citation=r""" +@article{bhagavatula2019abductive, + author = {Bhagavatula, Chandra and Bras, Ronan Le and Malaviya, Chaitanya and Sakaguchi, Keisuke and Holtzman, Ari and Rashkin, Hannah and Downey, Doug and Yih, Scott Wen-tau and Choi, Yejin}, + journal = {arXiv preprint arXiv:1908.05739}, + title = {Abductive commonsense reasoning}, + year = {2019}, } -@article{bhagavatula2019abductive, - title={Abductive commonsense reasoning}, - author={Bhagavatula, Chandra and Bras, Ronan Le and Malaviya, Chaitanya and Sakaguchi, Keisuke and Holtzman, Ari and Rashkin, Hannah and Downey, Doug and Yih, Scott Wen-tau and Choi, Yejin}, - journal={arXiv preprint arXiv:1908.05739}, - year={2019} +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/ArguAnaRetrieval.py b/mteb/tasks/Retrieval/eng/ArguAnaRetrieval.py index ff608bab6e..a3e7e93a75 100644 --- a/mteb/tasks/Retrieval/eng/ArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/ArguAnaRetrieval.py @@ -29,15 +29,17 @@ class ArguAna(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation=None, - bibtex_citation="""@inproceedings{boteva2016, + bibtex_citation=r""" +@inproceedings{boteva2016, author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan}, - title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + city = {Padova}, + country = {Italy}, journal = {Proceedings of the 38th European Conference on Information Retrieval}, journal-abbrev = {ECIR}, + title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf}, year = {2016}, - city = {Padova}, - country = {Italy}, - url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf} -}""", +} +""", prompt={"query": "Given a claim, find documents that refute the claim"}, ) diff --git a/mteb/tasks/Retrieval/eng/BrightRetrieval.py b/mteb/tasks/Retrieval/eng/BrightRetrieval.py index 4cb6cf8fbd..35b5b2e0bb 100644 --- a/mteb/tasks/Retrieval/eng/BrightRetrieval.py +++ b/mteb/tasks/Retrieval/eng/BrightRetrieval.py @@ -124,17 +124,17 @@ class BrightRetrieval(MultilingualTask, AbsTaskRetrieval): dialect=[], sample_creation="found", modalities=["text"], - bibtex_citation=""" + bibtex_citation=r""" @misc{su2024brightrealisticchallengingbenchmark, - title={BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval}, - author={Hongjin Su and Howard Yen and Mengzhou Xia and Weijia Shi and Niklas Muennighoff and Han-yu Wang and Haisu Liu and Quan Shi and Zachary S. Siegel and Michael Tang and Ruoxi Sun and Jinsung Yoon and Sercan O. Arik and Danqi Chen and Tao Yu}, - year={2024}, - eprint={2407.12883}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2407.12883}, + archiveprefix = {arXiv}, + author = {Hongjin Su and Howard Yen and Mengzhou Xia and Weijia Shi and Niklas Muennighoff and Han-yu Wang and Haisu Liu and Quan Shi and Zachary S. Siegel and Michael Tang and Ruoxi Sun and Jinsung Yoon and Sercan O. Arik and Danqi Chen and Tao Yu}, + eprint = {2407.12883}, + primaryclass = {cs.CL}, + title = {BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval}, + url = {https://arxiv.org/abs/2407.12883}, + year = {2024}, } - """, +""", ) load_bright_data = load_bright_data load_data = load_data diff --git a/mteb/tasks/Retrieval/eng/BuiltBenchRetrieval.py b/mteb/tasks/Retrieval/eng/BuiltBenchRetrieval.py index 5d36f219a7..5762b75c23 100644 --- a/mteb/tasks/Retrieval/eng/BuiltBenchRetrieval.py +++ b/mteb/tasks/Retrieval/eng/BuiltBenchRetrieval.py @@ -27,12 +27,14 @@ class BuiltBenchRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation="""@article{shahinmoghadam2024benchmarking, - title={Benchmarking pre-trained text embedding models in aligning built asset information}, - author={Shahinmoghadam, Mehrzad and Motamedi, Ali}, - journal={arXiv preprint arXiv:2411.12056}, - year={2024} -}""", + bibtex_citation=r""" +@article{shahinmoghadam2024benchmarking, + author = {Shahinmoghadam, Mehrzad and Motamedi, Ali}, + journal = {arXiv preprint arXiv:2411.12056}, + title = {Benchmarking pre-trained text embedding models in aligning built asset information}, + year = {2024}, +} +""", prompt={ "query": "Given a query, retrieve relevant entity descriptions from buit asset classification systems such as IFC and Uniclass" }, diff --git a/mteb/tasks/Retrieval/eng/CQADupstackAndroidRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackAndroidRetrieval.py index 156395a077..018c0e33c4 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackAndroidRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackAndroidRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackAndroidRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackEnglishRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackEnglishRetrieval.py index af47eda5c4..0c7397de97 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackEnglishRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackEnglishRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackEnglishRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackGamingRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackGamingRetrieval.py index b51a3e64b5..e5f557b0ec 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackGamingRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackGamingRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackGamingRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackGisRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackGisRetrieval.py index da38284f2d..ed6be358ee 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackGisRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackGisRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackGisRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackMathematicaRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackMathematicaRetrieval.py index b29d166129..a141fe222e 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackMathematicaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackMathematicaRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackMathematicaRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackPhysicsRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackPhysicsRetrieval.py index 3dd0fdc4a5..df6bb8e913 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackPhysicsRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackPhysicsRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackPhysicsRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackProgrammersRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackProgrammersRetrieval.py index f84b1b17e4..5d1ef668e6 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackProgrammersRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackProgrammersRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackProgrammersRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackStatsRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackStatsRetrieval.py index 1fd18f8d84..b0b052a3d9 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackStatsRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackStatsRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackStatsRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackTexRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackTexRetrieval.py index c4447442be..f44a67ed9e 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackTexRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackTexRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackTexRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackUnixRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackUnixRetrieval.py index 57c9964b15..7365f991f3 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackUnixRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackUnixRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackUnixRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackWebmastersRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackWebmastersRetrieval.py index 2e9bd63e08..78564903e7 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackWebmastersRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackWebmastersRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackWebmastersRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/CQADupstackWordpressRetrieval.py b/mteb/tasks/Retrieval/eng/CQADupstackWordpressRetrieval.py index 3b11866f82..9b89e232e9 100644 --- a/mteb/tasks/Retrieval/eng/CQADupstackWordpressRetrieval.py +++ b/mteb/tasks/Retrieval/eng/CQADupstackWordpressRetrieval.py @@ -27,21 +27,23 @@ class CQADupstackWordpressRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py index 88fbc50df4..d6f458c432 100644 --- a/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py @@ -26,35 +26,36 @@ class ChemHotpotQARetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @inproceedings{yang-etal-2018-hotpotqa, - title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", - author = "Yang, Zhilin and - Qi, Peng and - Zhang, Saizheng and - Bengio, Yoshua and - Cohen, William and - Salakhutdinov, Ruslan and - Manning, Christopher D.", - editor = "Riloff, Ellen and - Chiang, David and - Hockenmaier, Julia and - Tsujii, Jun{'}ichi", - booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", - month = oct # "-" # nov, - year = "2018", - address = "Brussels, Belgium", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D18-1259", - doi = "10.18653/v1/D18-1259", - pages = "2369--2380", - abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", - } + bibtex_citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} + +@inproceedings{yang-etal-2018-hotpotqa, + abstract = {Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.}, + address = {Brussels, Belgium}, + author = {Yang, Zhilin and +Qi, Peng and +Zhang, Saizheng and +Bengio, Yoshua and +Cohen, William and +Salakhutdinov, Ruslan and +Manning, Christopher D.}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D18-1259}, + editor = {Riloff, Ellen and +Chiang, David and +Hockenmaier, Julia and +Tsujii, Jun{'}ichi}, + month = oct # {-} # nov, + pages = {2369--2380}, + publisher = {Association for Computational Linguistics}, + title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering}, + url = {https://aclanthology.org/D18-1259}, + year = {2018}, +} """, ) diff --git a/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py b/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py index 6058932550..e970f7b6e9 100644 --- a/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py +++ b/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py @@ -26,20 +26,22 @@ class ChemNQRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} - } - @article{47761, - title = {Natural Questions: a Benchmark for Question Answering Research}, - author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh - and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee - and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le - and Slav Petrov}, - year = {2019}, - journal = {Transactions of the Association of Computational Linguistics}} - """, + bibtex_citation=r""" +@article{47761, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh +and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee +and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le +and Slav Petrov}, + journal = {Transactions of the Association of Computational Linguistics}, + title = {Natural Questions: a Benchmark for Question Answering Research}, + year = {2019}, +} + +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/ClimateFEVERRetrieval.py b/mteb/tasks/Retrieval/eng/ClimateFEVERRetrieval.py index dd0eebfa82..8f841ff14f 100644 --- a/mteb/tasks/Retrieval/eng/ClimateFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/ClimateFEVERRetrieval.py @@ -27,14 +27,16 @@ class ClimateFEVER(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{diggelmann2021climatefever, - title={CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, - author={Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, - year={2021}, - eprint={2012.00614}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{diggelmann2021climatefever, + archiveprefix = {arXiv}, + author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, + eprint = {2012.00614}, + primaryclass = {cs.CL}, + title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, + year = {2021}, +} +""", prompt={ "query": "Given a claim about climate change, retrieve documents that support or refute the claim" }, @@ -63,14 +65,16 @@ class ClimateFEVERHardNegatives(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{diggelmann2021climatefever, - title={CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, - author={Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, - year={2021}, - eprint={2012.00614}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{diggelmann2021climatefever, + archiveprefix = {arXiv}, + author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, + eprint = {2012.00614}, + primaryclass = {cs.CL}, + title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, + year = {2021}, +} +""", adapted_from=["ClimateFEVER"], ) @@ -97,14 +101,16 @@ class ClimateFEVERRetrievalv2(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{diggelmann2021climatefever, - title={CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, - author={Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, - year={2021}, - eprint={2012.00614}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{diggelmann2021climatefever, + archiveprefix = {arXiv}, + author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, + eprint = {2012.00614}, + primaryclass = {cs.CL}, + title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, + year = {2021}, +} +""", prompt={ "query": "Given a claim about climate change, retrieve documents that support or refute the claim" }, diff --git a/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py index 93fd6f68f1..c09043a83c 100644 --- a/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py @@ -27,16 +27,18 @@ class DBPedia(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{Hasibi:2017:DVT, - author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, - title = {DBpedia-Entity V2: A Test Collection for Entity Search}, - booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, - series = {SIGIR '17}, - year = {2017}, - pages = {1265--1268}, - doi = {10.1145/3077136.3080751}, - publisher = {ACM} -}""", + bibtex_citation=r""" +@inproceedings{Hasibi:2017:DVT, + author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, + booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, + doi = {10.1145/3077136.3080751}, + pages = {1265--1268}, + publisher = {ACM}, + series = {SIGIR '17}, + title = {DBpedia-Entity V2: A Test Collection for Entity Search}, + year = {2017}, +} +""", prompt={ "query": "Given a query, retrieve relevant entity descriptions from DBPedia" }, @@ -65,15 +67,17 @@ class DBPediaHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{Hasibi:2017:DVT, - author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, - title = {DBpedia-Entity V2: A Test Collection for Entity Search}, - booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, - series = {SIGIR '17}, - year = {2017}, - pages = {1265--1268}, - doi = {10.1145/3077136.3080751}, - publisher = {ACM} -}""", + bibtex_citation=r""" +@inproceedings{Hasibi:2017:DVT, + author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, + booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, + doi = {10.1145/3077136.3080751}, + pages = {1265--1268}, + publisher = {ACM}, + series = {SIGIR '17}, + title = {DBpedia-Entity V2: A Test Collection for Entity Search}, + year = {2017}, +} +""", adapted_from=["DBPedia"], ) diff --git a/mteb/tasks/Retrieval/eng/FEVERRetrieval.py b/mteb/tasks/Retrieval/eng/FEVERRetrieval.py index a4513179eb..b0e9b89e8a 100644 --- a/mteb/tasks/Retrieval/eng/FEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/FEVERRetrieval.py @@ -33,25 +33,27 @@ class FEVER(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{thorne-etal-2018-fever, - title = "{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification", - author = "Thorne, James and - Vlachos, Andreas and - Christodoulopoulos, Christos and - Mittal, Arpit", - editor = "Walker, Marilyn and - Ji, Heng and - Stent, Amanda", - booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", - month = jun, - year = "2018", - address = "New Orleans, Louisiana", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/N18-1074", - doi = "10.18653/v1/N18-1074", - pages = "809--819", - abstract = "In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.", -}""", + bibtex_citation=r""" +@inproceedings{thorne-etal-2018-fever, + abstract = {In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.}, + address = {New Orleans, Louisiana}, + author = {Thorne, James and +Vlachos, Andreas and +Christodoulopoulos, Christos and +Mittal, Arpit}, + booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)}, + doi = {10.18653/v1/N18-1074}, + editor = {Walker, Marilyn and +Ji, Heng and +Stent, Amanda}, + month = jun, + pages = {809--819}, + publisher = {Association for Computational Linguistics}, + title = {{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification}, + url = {https://aclanthology.org/N18-1074}, + year = {2018}, +} +""", prompt={ "query": "Given a claim, retrieve documents that support or refute the claim" }, @@ -86,24 +88,26 @@ class FEVERHardNegatives(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{thorne-etal-2018-fever, - title = "{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification", - author = "Thorne, James and - Vlachos, Andreas and - Christodoulopoulos, Christos and - Mittal, Arpit", - editor = "Walker, Marilyn and - Ji, Heng and - Stent, Amanda", - booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", - month = jun, - year = "2018", - address = "New Orleans, Louisiana", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/N18-1074", - doi = "10.18653/v1/N18-1074", - pages = "809--819", - abstract = "In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.", -}""", + bibtex_citation=r""" +@inproceedings{thorne-etal-2018-fever, + abstract = {In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.}, + address = {New Orleans, Louisiana}, + author = {Thorne, James and +Vlachos, Andreas and +Christodoulopoulos, Christos and +Mittal, Arpit}, + booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)}, + doi = {10.18653/v1/N18-1074}, + editor = {Walker, Marilyn and +Ji, Heng and +Stent, Amanda}, + month = jun, + pages = {809--819}, + publisher = {Association for Computational Linguistics}, + title = {{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification}, + url = {https://aclanthology.org/N18-1074}, + year = {2018}, +} +""", adapted_from=["FEVER"], ) diff --git a/mteb/tasks/Retrieval/eng/FaithDialRetrieval.py b/mteb/tasks/Retrieval/eng/FaithDialRetrieval.py index 8cd87ed04b..2c883ac02d 100644 --- a/mteb/tasks/Retrieval/eng/FaithDialRetrieval.py +++ b/mteb/tasks/Retrieval/eng/FaithDialRetrieval.py @@ -36,19 +36,19 @@ class FaithDialRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{dziri2022faithdial, - title = "{FaithDial: A Faithful Benchmark for Information-Seeking Dialogue}", - author = {Dziri, Nouha and Kamalloo, Ehsan and Milton, Sivan and Zaiane, Osmar and Yu, Mo and Ponti, Edoardo M and Reddy, Siva}, - journal = {Transactions of the Association for Computational Linguistics}, - volume = {10}, - pages = {1473--1490}, - year = {2022}, - month = {12}, - publisher = {MIT Press}, - doi={10.1162/tacl_a_00529} - } - """, + bibtex_citation=r""" +@article{dziri2022faithdial, + author = {Dziri, Nouha and Kamalloo, Ehsan and Milton, Sivan and Zaiane, Osmar and Yu, Mo and Ponti, Edoardo M and Reddy, Siva}, + doi = {10.1162/tacl_a_00529}, + journal = {Transactions of the Association for Computational Linguistics}, + month = {12}, + pages = {1473--1490}, + publisher = {MIT Press}, + title = {{FaithDial: A Faithful Benchmark for Information-Seeking Dialogue}}, + volume = {10}, + year = {2022}, +} +""", ) # TODO: Will be removed if curated and added to mteb HF diff --git a/mteb/tasks/Retrieval/eng/FeedbackQARetrieval.py b/mteb/tasks/Retrieval/eng/FeedbackQARetrieval.py index 44f0ac2522..8b388f2c5f 100644 --- a/mteb/tasks/Retrieval/eng/FeedbackQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/FeedbackQARetrieval.py @@ -29,25 +29,25 @@ class FeedbackQARetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{li-etal-2022-using, - title = "Using Interactive Feedback to Improve the Accuracy and Explainability of Question Answering Systems Post-Deployment", - author = "Li, Zichao and - Sharma, Prakhar and - Lu, Xing Han and - Cheung, Jackie and - Reddy, Siva", - editor = "Muresan, Smaranda and - Nakov, Preslav and - Villavicencio, Aline", - booktitle = "Findings of the Association for Computational Linguistics: ACL 2022", - month = may, - year = "2022", - address = "Dublin, Ireland", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.findings-acl.75", - doi = "10.18653/v1/2022.findings-acl.75", - pages = "926--937" + address = {Dublin, Ireland}, + author = {Li, Zichao and +Sharma, Prakhar and +Lu, Xing Han and +Cheung, Jackie and +Reddy, Siva}, + booktitle = {Findings of the Association for Computational Linguistics: ACL 2022}, + doi = {10.18653/v1/2022.findings-acl.75}, + editor = {Muresan, Smaranda and +Nakov, Preslav and +Villavicencio, Aline}, + month = may, + pages = {926--937}, + publisher = {Association for Computational Linguistics}, + title = {Using Interactive Feedback to Improve the Accuracy and Explainability of Question Answering Systems Post-Deployment}, + url = {https://aclanthology.org/2022.findings-acl.75}, + year = {2022}, } """, ) diff --git a/mteb/tasks/Retrieval/eng/FiQA2018Retrieval.py b/mteb/tasks/Retrieval/eng/FiQA2018Retrieval.py index 7a99d48a95..d8d6534233 100644 --- a/mteb/tasks/Retrieval/eng/FiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/eng/FiQA2018Retrieval.py @@ -29,14 +29,15 @@ class FiQA2018(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{ -thakur2021beir, -title={{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, -author={Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, -booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, -year={2021}, -url={https://openreview.net/forum?id=wCu6T5xFjeJ} -}""", + bibtex_citation=r""" +@inproceedings{thakur2021beir, + author = {Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + title = {{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, + url = {https://openreview.net/forum?id=wCu6T5xFjeJ}, + year = {2021}, +} +""", prompt={ "query": "Given a financial question, retrieve user replies that best answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/HagridRetrieval.py b/mteb/tasks/Retrieval/eng/HagridRetrieval.py index 74bab52076..0a851a1031 100644 --- a/mteb/tasks/Retrieval/eng/HagridRetrieval.py +++ b/mteb/tasks/Retrieval/eng/HagridRetrieval.py @@ -36,12 +36,14 @@ class HagridRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{hagrid, - title={{HAGRID}: A Human-LLM Collaborative Dataset for Generative Information-Seeking with Attribution}, - author={Ehsan Kamalloo and Aref Jafari and Xinyu Zhang and Nandan Thakur and Jimmy Lin}, - year={2023}, - journal={arXiv:2307.16883}, -}""", + bibtex_citation=r""" +@article{hagrid, + author = {Ehsan Kamalloo and Aref Jafari and Xinyu Zhang and Nandan Thakur and Jimmy Lin}, + journal = {arXiv:2307.16883}, + title = {{HAGRID}: A Human-LLM Collaborative Dataset for Generative Information-Seeking with Attribution}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/HellaSwagRetrieval.py b/mteb/tasks/Retrieval/eng/HellaSwagRetrieval.py index 81b53e5c42..1eaeed640e 100644 --- a/mteb/tasks/Retrieval/eng/HellaSwagRetrieval.py +++ b/mteb/tasks/Retrieval/eng/HellaSwagRetrieval.py @@ -28,17 +28,19 @@ class HellaSwag(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} + bibtex_citation=r""" +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } + @article{zellers2019hellaswag, - title={Hellaswag: Can a machine really finish your sentence?}, - author={Zellers, Rowan and Holtzman, Ari and Bisk, Yonatan and Farhadi, Ali and Choi, Yejin}, - journal={arXiv preprint arXiv:1905.07830}, - year={2019} + author = {Zellers, Rowan and Holtzman, Ari and Bisk, Yonatan and Farhadi, Ali and Choi, Yejin}, + journal = {arXiv preprint arXiv:1905.07830}, + title = {Hellaswag: Can a machine really finish your sentence?}, + year = {2019}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py index cb71abea12..0b8a9e212a 100644 --- a/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py @@ -30,29 +30,31 @@ class HotpotQA(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{yang-etal-2018-hotpotqa, - title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", - author = "Yang, Zhilin and - Qi, Peng and - Zhang, Saizheng and - Bengio, Yoshua and - Cohen, William and - Salakhutdinov, Ruslan and - Manning, Christopher D.", - editor = "Riloff, Ellen and - Chiang, David and - Hockenmaier, Julia and - Tsujii, Jun{'}ichi", - booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", - month = oct # "-" # nov, - year = "2018", - address = "Brussels, Belgium", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D18-1259", - doi = "10.18653/v1/D18-1259", - pages = "2369--2380", - abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", -}""", + bibtex_citation=r""" +@inproceedings{yang-etal-2018-hotpotqa, + abstract = {Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.}, + address = {Brussels, Belgium}, + author = {Yang, Zhilin and +Qi, Peng and +Zhang, Saizheng and +Bengio, Yoshua and +Cohen, William and +Salakhutdinov, Ruslan and +Manning, Christopher D.}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D18-1259}, + editor = {Riloff, Ellen and +Chiang, David and +Hockenmaier, Julia and +Tsujii, Jun{'}ichi}, + month = oct # {-} # nov, + pages = {2369--2380}, + publisher = {Association for Computational Linguistics}, + title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering}, + url = {https://aclanthology.org/D18-1259}, + year = {2018}, +} +""", prompt={ "query": "Given a multi-hop question, retrieve documents that can help answer the question" }, @@ -84,28 +86,30 @@ class HotpotQAHardNegatives(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{yang-etal-2018-hotpotqa, - title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", - author = "Yang, Zhilin and - Qi, Peng and - Zhang, Saizheng and - Bengio, Yoshua and - Cohen, William and - Salakhutdinov, Ruslan and - Manning, Christopher D.", - editor = "Riloff, Ellen and - Chiang, David and - Hockenmaier, Julia and - Tsujii, Jun{'}ichi", - booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", - month = oct # "-" # nov, - year = "2018", - address = "Brussels, Belgium", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D18-1259", - doi = "10.18653/v1/D18-1259", - pages = "2369--2380", - abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", -}""", + bibtex_citation=r""" +@inproceedings{yang-etal-2018-hotpotqa, + abstract = {Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.}, + address = {Brussels, Belgium}, + author = {Yang, Zhilin and +Qi, Peng and +Zhang, Saizheng and +Bengio, Yoshua and +Cohen, William and +Salakhutdinov, Ruslan and +Manning, Christopher D.}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D18-1259}, + editor = {Riloff, Ellen and +Chiang, David and +Hockenmaier, Julia and +Tsujii, Jun{'}ichi}, + month = oct # {-} # nov, + pages = {2369--2380}, + publisher = {Association for Computational Linguistics}, + title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering}, + url = {https://aclanthology.org/D18-1259}, + year = {2018}, +} +""", adapted_from=["HotpotQA"], ) diff --git a/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py b/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py index 3d45290d71..e42c63796a 100644 --- a/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py @@ -32,31 +32,31 @@ class LEMBNarrativeQARetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{kocisky-etal-2018-narrativeqa, - title = "The {N}arrative{QA} Reading Comprehension Challenge", - author = "Ko{\v{c}}isk{\'y}, Tom{\'a}{\v{s}} and - Schwarz, Jonathan and - Blunsom, Phil and - Dyer, Chris and - Hermann, Karl Moritz and - Melis, G{\'a}bor and - Grefenstette, Edward", - editor = "Lee, Lillian and - Johnson, Mark and - Toutanova, Kristina and - Roark, Brian", - journal = "Transactions of the Association for Computational Linguistics", - volume = "6", - year = "2018", - address = "Cambridge, MA", - publisher = "MIT Press", - url = "https://aclanthology.org/Q18-1023", - doi = "10.1162/tacl_a_00023", - pages = "317--328", - abstract = "", - } - """, + bibtex_citation=r""" +@article{kocisky-etal-2018-narrativeqa, + abstract = {}, + address = {Cambridge, MA}, + author = {Ko{\v{c}}isk{\'y}, Tom{\'a}{\v{s}} and +Schwarz, Jonathan and +Blunsom, Phil and +Dyer, Chris and +Hermann, Karl Moritz and +Melis, G{\'a}bor and +Grefenstette, Edward}, + doi = {10.1162/tacl_a_00023}, + editor = {Lee, Lillian and +Johnson, Mark and +Toutanova, Kristina and +Roark, Brian}, + journal = {Transactions of the Association for Computational Linguistics}, + pages = {317--328}, + publisher = {MIT Press}, + title = {The {N}arrative{QA} Reading Comprehension Challenge}, + url = {https://aclanthology.org/Q18-1023}, + volume = {6}, + year = {2018}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py index c467843d01..1a752a1aab 100644 --- a/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py @@ -41,14 +41,14 @@ class LEMBNeedleRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{zhu2024longembed, - title={LongEmbed: Extending Embedding Models for Long Context Retrieval}, - author={Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, - journal={arXiv preprint arXiv:2404.12096}, - year={2024} - } - """, + bibtex_citation=r""" +@article{zhu2024longembed, + author = {Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, + journal = {arXiv preprint arXiv:2404.12096}, + title = {LongEmbed: Extending Embedding Models for Long Context Retrieval}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py index f3c9b96485..560c72f562 100644 --- a/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py @@ -41,14 +41,14 @@ class LEMBPasskeyRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{zhu2024longembed, - title={LongEmbed: Extending Embedding Models for Long Context Retrieval}, - author={Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, - journal={arXiv preprint arXiv:2404.12096}, - year={2024} - } - """, + bibtex_citation=r""" +@article{zhu2024longembed, + author = {Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, + journal = {arXiv preprint arXiv:2404.12096}, + title = {LongEmbed: Extending Embedding Models for Long Context Retrieval}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py index c302e4758a..2aca710367 100644 --- a/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py @@ -32,40 +32,40 @@ class LEMBQMSumRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{zhong-etal-2021-qmsum, - title = "{QMS}um: A New Benchmark for Query-based Multi-domain Meeting Summarization", - author = "Zhong, Ming and - Yin, Da and - Yu, Tao and - Zaidi, Ahmad and - Mutuma, Mutethia and - Jha, Rahul and - Awadallah, Ahmed Hassan and - Celikyilmaz, Asli and - Liu, Yang and - Qiu, Xipeng and - Radev, Dragomir", - editor = "Toutanova, Kristina and - Rumshisky, Anna and - Zettlemoyer, Luke and - Hakkani-Tur, Dilek and - Beltagy, Iz and - Bethard, Steven and - Cotterell, Ryan and - Chakraborty, Tanmoy and - Zhou, Yichao", - booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies", - month = jun, - year = "2021", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.naacl-main.472", - doi = "10.18653/v1/2021.naacl-main.472", - pages = "5905--5921", - abstract = "", - } - """, + bibtex_citation=r""" +@inproceedings{zhong-etal-2021-qmsum, + abstract = {}, + address = {Online}, + author = {Zhong, Ming and +Yin, Da and +Yu, Tao and +Zaidi, Ahmad and +Mutuma, Mutethia and +Jha, Rahul and +Awadallah, Ahmed Hassan and +Celikyilmaz, Asli and +Liu, Yang and +Qiu, Xipeng and +Radev, Dragomir}, + booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, + doi = {10.18653/v1/2021.naacl-main.472}, + editor = {Toutanova, Kristina and +Rumshisky, Anna and +Zettlemoyer, Luke and +Hakkani-Tur, Dilek and +Beltagy, Iz and +Bethard, Steven and +Cotterell, Ryan and +Chakraborty, Tanmoy and +Zhou, Yichao}, + month = jun, + pages = {5905--5921}, + publisher = {Association for Computational Linguistics}, + title = {{QMS}um: A New Benchmark for Query-based Multi-domain Meeting Summarization}, + url = {https://aclanthology.org/2021.naacl-main.472}, + year = {2021}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py index c2c6b6db03..748032c767 100644 --- a/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py @@ -32,27 +32,27 @@ class LEMBSummScreenFDRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{chen-etal-2022-summscreen, - title = "{S}umm{S}creen: A Dataset for Abstractive Screenplay Summarization", - author = "Chen, Mingda and - Chu, Zewei and - Wiseman, Sam and - Gimpel, Kevin", - editor = "Muresan, Smaranda and - Nakov, Preslav and - Villavicencio, Aline", - booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = may, - year = "2022", - address = "Dublin, Ireland", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.acl-long.589", - doi = "10.18653/v1/2022.acl-long.589", - pages = "8602--8615", - abstract = "", - } - """, + bibtex_citation=r""" +@inproceedings{chen-etal-2022-summscreen, + abstract = {}, + address = {Dublin, Ireland}, + author = {Chen, Mingda and +Chu, Zewei and +Wiseman, Sam and +Gimpel, Kevin}, + booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.18653/v1/2022.acl-long.589}, + editor = {Muresan, Smaranda and +Nakov, Preslav and +Villavicencio, Aline}, + month = may, + pages = {8602--8615}, + publisher = {Association for Computational Linguistics}, + title = {{S}umm{S}creen: A Dataset for Abstractive Screenplay Summarization}, + url = {https://aclanthology.org/2022.acl-long.589}, + year = {2022}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py b/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py index 04e8b3bb86..5e03c5caf8 100644 --- a/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py @@ -32,15 +32,15 @@ class LEMBWikimQARetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{ho2020constructing, - title={Constructing A Multi-hop QA Dataset for Comprehensive Evaluation of Reasoning Steps}, - author={Ho, Xanh and Nguyen, Anh-Khoa Duong and Sugawara, Saku and Aizawa, Akiko}, - booktitle={Proceedings of the 28th International Conference on Computational Linguistics}, - pages={6609--6625}, - year={2020} - } - """, + bibtex_citation=r""" +@inproceedings{ho2020constructing, + author = {Ho, Xanh and Nguyen, Anh-Khoa Duong and Sugawara, Saku and Aizawa, Akiko}, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + pages = {6609--6625}, + title = {Constructing A Multi-hop QA Dataset for Comprehensive Evaluation of Reasoning Steps}, + year = {2020}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/LegalBenchConsumerContractsQARetrieval.py b/mteb/tasks/Retrieval/eng/LegalBenchConsumerContractsQARetrieval.py index 39923194ec..743c9af0f1 100644 --- a/mteb/tasks/Retrieval/eng/LegalBenchConsumerContractsQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/LegalBenchConsumerContractsQARetrieval.py @@ -27,17 +27,19 @@ class LegalBenchConsumerContractsQA(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} - } + bibtex_citation=r""" +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} - @article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} -}""", +@article{koreeda2021contractnli, + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/LegalBenchCorporateLobbyingRetrieval.py b/mteb/tasks/Retrieval/eng/LegalBenchCorporateLobbyingRetrieval.py index a5003b09be..1138eb1155 100644 --- a/mteb/tasks/Retrieval/eng/LegalBenchCorporateLobbyingRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LegalBenchCorporateLobbyingRetrieval.py @@ -27,74 +27,84 @@ class LegalBenchCorporateLobbying(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@misc{guha2023legalbench, - title={LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, - author={Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, - year={2023}, - eprint={2308.11462}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + bibtex_citation=r""" +@misc{guha2023legalbench, + archiveprefix = {arXiv}, + author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher Ré and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li}, + eprint = {2308.11462}, + primaryclass = {cs.CL}, + title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models}, + year = {2023}, } + +@article{hendrycks2021cuad, + author = {Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, + journal = {arXiv preprint arXiv:2103.06268}, + title = {Cuad: An expert-annotated nlp dataset for legal contract review}, + year = {2021}, +} + +@article{holzenberger2021factoring, + author = {Holzenberger, Nils and Van Durme, Benjamin}, + journal = {arXiv preprint arXiv:2105.07903}, + title = {Factoring statutory reasoning as language understanding challenges}, + year = {2021}, +} + @article{koreeda2021contractnli, - title={ContractNLI: A dataset for document-level natural language inference for contracts}, - author={Koreeda, Yuta and Manning, Christopher D}, - journal={arXiv preprint arXiv:2110.01799}, - year={2021} + author = {Koreeda, Yuta and Manning, Christopher D}, + journal = {arXiv preprint arXiv:2110.01799}, + title = {ContractNLI: A dataset for document-level natural language inference for contracts}, + year = {2021}, } -@article{hendrycks2021cuad, - title={Cuad: An expert-annotated nlp dataset for legal contract review}, - author={Hendrycks, Dan and Burns, Collin and Chen, Anya and Ball, Spencer}, - journal={arXiv preprint arXiv:2103.06268}, - year={2021} + +@article{lippi2019claudette, + author = {Lippi, Marco and Pa{\l}ka, Przemys{\l}aw and Contissa, Giuseppe and Lagioia, Francesca and Micklitz, Hans-Wolfgang and Sartor, Giovanni and Torroni, Paolo}, + journal = {Artificial Intelligence and Law}, + pages = {117--139}, + publisher = {Springer}, + title = {CLAUDETTE: an automated detector of potentially unfair clauses in online terms of service}, + volume = {27}, + year = {2019}, } + +@article{ravichander2019question, + author = {Ravichander, Abhilasha and Black, Alan W and Wilson, Shomir and Norton, Thomas and Sadeh, Norman}, + journal = {arXiv preprint arXiv:1911.00841}, + title = {Question answering for privacy policies: Combining computational and legal perspectives}, + year = {2019}, +} + @article{wang2023maud, - title={MAUD: An Expert-Annotated Legal NLP Dataset for Merger Agreement Understanding}, - author={Wang, Steven H and Scardigli, Antoine and Tang, Leonard and Chen, Wei and Levkin, Dimitry and Chen, Anya and Ball, Spencer and Woodside, Thomas and Zhang, Oliver and Hendrycks, Dan}, - journal={arXiv preprint arXiv:2301.00876}, - year={2023} + author = {Wang, Steven H and Scardigli, Antoine and Tang, Leonard and Chen, Wei and Levkin, Dimitry and Chen, Anya and Ball, Spencer and Woodside, Thomas and Zhang, Oliver and Hendrycks, Dan}, + journal = {arXiv preprint arXiv:2301.00876}, + title = {MAUD: An Expert-Annotated Legal NLP Dataset for Merger Agreement Understanding}, + year = {2023}, } + @inproceedings{wilson2016creation, - title={The creation and analysis of a website privacy policy corpus}, - author={Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, - booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, - pages={1330--1340}, - year={2016} + author = {Wilson, Shomir and Schaub, Florian and Dara, Aswarth Abhilash and Liu, Frederick and Cherivirala, Sushain and Leon, Pedro Giovanni and Andersen, Mads Schaarup and Zimmeck, Sebastian and Sathyendra, Kanthashree Mysore and Russell, N Cameron and others}, + booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages = {1330--1340}, + title = {The creation and analysis of a website privacy policy corpus}, + year = {2016}, } + @inproceedings{zheng2021does, - title={When does pretraining help? assessing self-supervised learning for law and the casehold dataset of 53,000+ legal holdings}, - author={Zheng, Lucia and Guha, Neel and Anderson, Brandon R and Henderson, Peter and Ho, Daniel E}, - booktitle={Proceedings of the eighteenth international conference on artificial intelligence and law}, - pages={159--168}, - year={2021} + author = {Zheng, Lucia and Guha, Neel and Anderson, Brandon R and Henderson, Peter and Ho, Daniel E}, + booktitle = {Proceedings of the eighteenth international conference on artificial intelligence and law}, + pages = {159--168}, + title = {When does pretraining help? assessing self-supervised learning for law and the casehold dataset of 53,000+ legal holdings}, + year = {2021}, } + @article{zimmeck2019maps, - title={Maps: Scaling privacy compliance analysis to a million apps}, - author={Zimmeck, Sebastian and Story, Peter and Smullen, Daniel and Ravichander, Abhilasha and Wang, Ziqi and Reidenberg, Joel R and Russell, N Cameron and Sadeh, Norman}, - journal={Proc. Priv. Enhancing Tech.}, - volume={2019}, - pages={66}, - year={2019} -} -@article{ravichander2019question, - title={Question answering for privacy policies: Combining computational and legal perspectives}, - author={Ravichander, Abhilasha and Black, Alan W and Wilson, Shomir and Norton, Thomas and Sadeh, Norman}, - journal={arXiv preprint arXiv:1911.00841}, - year={2019} -} -@article{holzenberger2021factoring, - title={Factoring statutory reasoning as language understanding challenges}, - author={Holzenberger, Nils and Van Durme, Benjamin}, - journal={arXiv preprint arXiv:2105.07903}, - year={2021} -} -@article{lippi2019claudette, - title={CLAUDETTE: an automated detector of potentially unfair clauses in online terms of service}, - author={Lippi, Marco and Pa{\l}ka, Przemys{\l}aw and Contissa, Giuseppe and Lagioia, Francesca and Micklitz, Hans-Wolfgang and Sartor, Giovanni and Torroni, Paolo}, - journal={Artificial Intelligence and Law}, - volume={27}, - pages={117--139}, - year={2019}, - publisher={Springer} + author = {Zimmeck, Sebastian and Story, Peter and Smullen, Daniel and Ravichander, Abhilasha and Wang, Ziqi and Reidenberg, Joel R and Russell, N Cameron and Sadeh, Norman}, + journal = {Proc. Priv. Enhancing Tech.}, + pages = {66}, + title = {Maps: Scaling privacy compliance analysis to a million apps}, + volume = {2019}, + year = {2019}, } """, ) diff --git a/mteb/tasks/Retrieval/eng/LegalSummarizationRetrieval.py b/mteb/tasks/Retrieval/eng/LegalSummarizationRetrieval.py index 3fc4cf167d..0e0d8ddb80 100644 --- a/mteb/tasks/Retrieval/eng/LegalSummarizationRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LegalSummarizationRetrieval.py @@ -27,16 +27,18 @@ class LegalSummarization(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@inproceedings{manor-li-2019-plain, - title = "Plain {E}nglish Summarization of Contracts", - author = "Manor, Laura and - Li, Junyi Jessy", - booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2019", - month = jun, - year = "2019", - address = "Minneapolis, Minnesota", - publisher = "Association for Computational Linguistics", - url = "https://www.aclweb.org/anthology/W19-2201", - pages = "1--11", -}""", + bibtex_citation=r""" +@inproceedings{manor-li-2019-plain, + address = {Minneapolis, Minnesota}, + author = {Manor, Laura and +Li, Junyi Jessy}, + booktitle = {Proceedings of the Natural Legal Language Processing Workshop 2019}, + month = jun, + pages = {1--11}, + publisher = {Association for Computational Linguistics}, + title = {Plain {E}nglish Summarization of Contracts}, + url = {https://www.aclweb.org/anthology/W19-2201}, + year = {2019}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py b/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py index b9c3683ed4..894f009d87 100644 --- a/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py @@ -35,11 +35,13 @@ class LitSearchRetrieval(AbsTaskRetrieval): annotations_creators="LM-generated", # generated by GPT-4 dialect=[], sample_creation="found", # queries LLM generated, corpus samples are found (extracted from S2ORC) - bibtex_citation="""@article{ajith2024litsearch, - title={LitSearch: A Retrieval Benchmark for Scientific Literature Search}, - author={Ajith, Anirudh and Xia, Mengzhou and Chevalier, Alexis and Goyal, Tanya and Chen, Danqi and Gao, Tianyu}, - year={2024} - }""", + bibtex_citation=r""" +@article{ajith2024litsearch, + author = {Ajith, Anirudh and Xia, Mengzhou and Chevalier, Alexis and Goyal, Tanya and Chen, Danqi and Gao, Tianyu}, + title = {LitSearch: A Retrieval Benchmark for Scientific Literature Search}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/MLQuestions.py b/mteb/tasks/Retrieval/eng/MLQuestions.py index 6b594be445..a381abe269 100644 --- a/mteb/tasks/Retrieval/eng/MLQuestions.py +++ b/mteb/tasks/Retrieval/eng/MLQuestions.py @@ -39,23 +39,23 @@ class MLQuestionsRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{kulshreshtha-etal-2021-back, - title = "Back-Training excels Self-Training at Unsupervised Domain Adaptation of Question Generation and Passage Retrieval", - author = "Kulshreshtha, Devang and - Belfer, Robert and - Serban, Iulian Vlad and - Reddy, Siva", - booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", - month = nov, - year = "2021", - address = "Online and Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.emnlp-main.566", - pages = "7064--7078", - abstract = "In this work, we introduce back-training, an alternative to self-training for unsupervised domain adaptation (UDA). While self-training generates synthetic training data where natural inputs are aligned with noisy outputs, back-training results in natural outputs aligned with noisy inputs. This significantly reduces the gap between target domain and synthetic data distribution, and reduces model overfitting to source domain. We run UDA experiments on question generation and passage retrieval from the Natural Questions domain to machine learning and biomedical domains. We find that back-training vastly outperforms self-training by a mean improvement of 7.8 BLEU-4 points on generation, and 17.6{\%} top-20 retrieval accuracy across both domains. We further propose consistency filters to remove low-quality synthetic data before training. We also release a new domain-adaptation dataset - MLQuestions containing 35K unaligned questions, 50K unaligned passages, and 3K aligned question-passage pairs.", - } - """, + bibtex_citation=r""" +@inproceedings{kulshreshtha-etal-2021-back, + abstract = {In this work, we introduce back-training, an alternative to self-training for unsupervised domain adaptation (UDA). While self-training generates synthetic training data where natural inputs are aligned with noisy outputs, back-training results in natural outputs aligned with noisy inputs. This significantly reduces the gap between target domain and synthetic data distribution, and reduces model overfitting to source domain. We run UDA experiments on question generation and passage retrieval from the Natural Questions domain to machine learning and biomedical domains. We find that back-training vastly outperforms self-training by a mean improvement of 7.8 BLEU-4 points on generation, and 17.6{\%} top-20 retrieval accuracy across both domains. We further propose consistency filters to remove low-quality synthetic data before training. We also release a new domain-adaptation dataset - MLQuestions containing 35K unaligned questions, 50K unaligned passages, and 3K aligned question-passage pairs.}, + address = {Online and Punta Cana, Dominican Republic}, + author = {Kulshreshtha, Devang and +Belfer, Robert and +Serban, Iulian Vlad and +Reddy, Siva}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + month = nov, + pages = {7064--7078}, + publisher = {Association for Computational Linguistics}, + title = {Back-Training excels Self-Training at Unsupervised Domain Adaptation of Question Generation and Passage Retrieval}, + url = {https://aclanthology.org/2021.emnlp-main.566}, + year = {2021}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/MSMARCORetrieval.py b/mteb/tasks/Retrieval/eng/MSMARCORetrieval.py index 252bf075b4..26f78f76e0 100644 --- a/mteb/tasks/Retrieval/eng/MSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/eng/MSMARCORetrieval.py @@ -40,26 +40,27 @@ class MSMARCO(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/NguyenRSGTMD16, - author = {Tri Nguyen and - Mir Rosenberg and - Xia Song and - Jianfeng Gao and - Saurabh Tiwary and - Rangan Majumder and - Li Deng}, - title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, - journal = {CoRR}, - volume = {abs/1611.09268}, - year = {2016}, - url = {http://arxiv.org/abs/1611.09268}, - archivePrefix = {arXiv}, - eprint = {1611.09268}, + bibtex_citation=r""" +@article{DBLP:journals/corr/NguyenRSGTMD16, + archiveprefix = {arXiv}, + author = {Tri Nguyen and +Mir Rosenberg and +Xia Song and +Jianfeng Gao and +Saurabh Tiwary and +Rangan Majumder and +Li Deng}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, + eprint = {1611.09268}, + journal = {CoRR}, timestamp = {Mon, 13 Aug 2018 16:49:03 +0200}, - biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} + title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, + url = {http://arxiv.org/abs/1611.09268}, + volume = {abs/1611.09268}, + year = {2016}, } -}""", +""", prompt={ "query": "Given a web search query, retrieve relevant passages that answer the query" }, @@ -101,25 +102,26 @@ class MSMARCOHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/NguyenRSGTMD16, - author = {Tri Nguyen and - Mir Rosenberg and - Xia Song and - Jianfeng Gao and - Saurabh Tiwary and - Rangan Majumder and - Li Deng}, - title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, - journal = {CoRR}, - volume = {abs/1611.09268}, - year = {2016}, - url = {http://arxiv.org/abs/1611.09268}, - archivePrefix = {arXiv}, - eprint = {1611.09268}, + bibtex_citation=r""" +@article{DBLP:journals/corr/NguyenRSGTMD16, + archiveprefix = {arXiv}, + author = {Tri Nguyen and +Mir Rosenberg and +Xia Song and +Jianfeng Gao and +Saurabh Tiwary and +Rangan Majumder and +Li Deng}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, + eprint = {1611.09268}, + journal = {CoRR}, timestamp = {Mon, 13 Aug 2018 16:49:03 +0200}, - biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} + title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, + url = {http://arxiv.org/abs/1611.09268}, + volume = {abs/1611.09268}, + year = {2016}, } -}""", +""", adapted_from=["MSMARCO"], ) diff --git a/mteb/tasks/Retrieval/eng/MSMARCOv2Retrieval.py b/mteb/tasks/Retrieval/eng/MSMARCOv2Retrieval.py index 0a784805f9..920d79c855 100644 --- a/mteb/tasks/Retrieval/eng/MSMARCOv2Retrieval.py +++ b/mteb/tasks/Retrieval/eng/MSMARCOv2Retrieval.py @@ -38,25 +38,26 @@ class MSMARCOv2(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/NguyenRSGTMD16, - author = {Tri Nguyen and - Mir Rosenberg and - Xia Song and - Jianfeng Gao and - Saurabh Tiwary and - Rangan Majumder and - Li Deng}, - title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, - journal = {CoRR}, - volume = {abs/1611.09268}, - year = {2016}, - url = {http://arxiv.org/abs/1611.09268}, - archivePrefix = {arXiv}, - eprint = {1611.09268}, - timestamp = {Mon, 13 Aug 2018 16:49:03 +0200}, - biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} - } - }""", + bibtex_citation=r""" +@article{DBLP:journals/corr/NguyenRSGTMD16, + archiveprefix = {arXiv}, + author = {Tri Nguyen and +Mir Rosenberg and +Xia Song and +Jianfeng Gao and +Saurabh Tiwary and +Rangan Majumder and +Li Deng}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, + eprint = {1611.09268}, + journal = {CoRR}, + timestamp = {Mon, 13 Aug 2018 16:49:03 +0200}, + title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, + url = {http://arxiv.org/abs/1611.09268}, + volume = {abs/1611.09268}, + year = {2016}, +} +""", adapted_from=["MSMARCO"], ) diff --git a/mteb/tasks/Retrieval/eng/MedicalQARetrieval.py b/mteb/tasks/Retrieval/eng/MedicalQARetrieval.py index 12607572bd..a3017155ee 100644 --- a/mteb/tasks/Retrieval/eng/MedicalQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/MedicalQARetrieval.py @@ -26,14 +26,16 @@ class MedicalQARetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@ARTICLE{BenAbacha-BMC-2019, - author = {Asma, Ben Abacha and Dina, Demner{-}Fushman}, - title = {A Question-Entailment Approach to Question Answering}, - journal = {{BMC} Bioinform.}, - volume = {20}, - number = {1}, - pages = {511:1--511:23}, - year = {2019}, - url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3119-4} - } """, + bibtex_citation=r""" +@article{BenAbacha-BMC-2019, + author = {Asma, Ben Abacha and Dina, Demner{-}Fushman}, + journal = {{BMC} Bioinform.}, + number = {1}, + pages = {511:1--511:23}, + title = {A Question-Entailment Approach to Question Answering}, + url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3119-4}, + volume = {20}, + year = {2019}, +} +""", ) diff --git a/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py index 31f4eb60b1..bc69775d72 100644 --- a/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py @@ -27,16 +27,18 @@ class NFCorpus(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{boteva2016, + bibtex_citation=r""" +@inproceedings{boteva2016, author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan}, - title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + city = {Padova}, + country = {Italy}, journal = {Proceedings of the 38th European Conference on Information Retrieval}, journal-abbrev = {ECIR}, + title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf}, year = {2016}, - city = {Padova}, - country = {Italy}, - url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf} -}""", +} +""", prompt={ "query": "Given a question, retrieve relevant documents that best answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NQRetrieval.py b/mteb/tasks/Retrieval/eng/NQRetrieval.py index 33334493d3..105e46224a 100644 --- a/mteb/tasks/Retrieval/eng/NQRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NQRetrieval.py @@ -27,12 +27,18 @@ class NQ(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{47761,title = {Natural Questions: a Benchmark for Question Answering Research}, - author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh - and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee - and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le - and Slav Petrov},year = {2019},journal = {Transactions of the Association of Computational - Linguistics}}""", + bibtex_citation=r""" +@article{47761, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh +and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee +and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le +and Slav Petrov}, + journal = {Transactions of the Association of Computational +Linguistics}, + title = {Natural Questions: a Benchmark for Question Answering Research}, + year = {2019}, +} +""", prompt={ "query": "Given a question, retrieve Wikipedia passages that answer the question" }, @@ -61,12 +67,18 @@ class NQHardNegatives(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{47761,title = {Natural Questions: a Benchmark for Question Answering Research}, - author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh - and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee - and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le - and Slav Petrov},year = {2019},journal = {Transactions of the Association of Computational - Linguistics}}""", + bibtex_citation=r""" +@article{47761, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh +and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee +and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le +and Slav Petrov}, + journal = {Transactions of the Association of Computational +Linguistics}, + title = {Natural Questions: a Benchmark for Question Answering Research}, + year = {2019}, +} +""", prompt={ "query": "Given a question, retrieve Wikipedia passages that answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py index 9f8ccbd783..190a9810fa 100644 --- a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py @@ -30,16 +30,18 @@ class NanoArguAnaRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{boteva2016, + bibtex_citation=r""" +@inproceedings{boteva2016, author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan}, - title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + city = {Padova}, + country = {Italy}, journal = {Proceedings of the 38th European Conference on Information Retrieval}, journal-abbrev = {ECIR}, + title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf}, year = {2016}, - city = {Padova}, - country = {Italy}, - url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf} -}""", +} +""", prompt={"query": "Given a claim, find documents that refute the claim"}, adapted_from=["ArguAna"], ) diff --git a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py index bde0dc4b32..910dafa0e4 100644 --- a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py @@ -30,14 +30,16 @@ class NanoClimateFeverRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{diggelmann2021climatefever, - title={CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, - author={Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, - year={2021}, - eprint={2012.00614}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{diggelmann2021climatefever, + archiveprefix = {arXiv}, + author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, + eprint = {2012.00614}, + primaryclass = {cs.CL}, + title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, + year = {2021}, +} +""", prompt={ "query": "Given a claim about climate change, retrieve documents that support or refute the claim" }, diff --git a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py index da21922d72..b0cd4fd753 100644 --- a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py @@ -30,7 +30,14 @@ class NanoDBPediaRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{lehmann2015dbpedia, title={DBpedia: A large-scale, multilingual knowledge base extracted from Wikipedia}, author={Lehmann, Jens and et al.}, journal={Semantic Web}, year={2015}}""", + bibtex_citation=r""" +@article{lehmann2015dbpedia, + author = {Lehmann, Jens and et al.}, + journal = {Semantic Web}, + title = {DBpedia: A large-scale, multilingual knowledge base extracted from Wikipedia}, + year = {2015}, +} +""", prompt={ "query": "Given a query, retrieve relevant entity descriptions from DBPedia" }, diff --git a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py index 8fbb463ecb..aa560aa40d 100644 --- a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py @@ -33,25 +33,27 @@ class NanoFEVERRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{thorne-etal-2018-fever, - title = "{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification", - author = "Thorne, James and - Vlachos, Andreas and - Christodoulopoulos, Christos and - Mittal, Arpit", - editor = "Walker, Marilyn and - Ji, Heng and - Stent, Amanda", - booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", - month = jun, - year = "2018", - address = "New Orleans, Louisiana", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/N18-1074", - doi = "10.18653/v1/N18-1074", - pages = "809--819", - abstract = "In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.", -}""", + bibtex_citation=r""" +@inproceedings{thorne-etal-2018-fever, + abstract = {In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.}, + address = {New Orleans, Louisiana}, + author = {Thorne, James and +Vlachos, Andreas and +Christodoulopoulos, Christos and +Mittal, Arpit}, + booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)}, + doi = {10.18653/v1/N18-1074}, + editor = {Walker, Marilyn and +Ji, Heng and +Stent, Amanda}, + month = jun, + pages = {809--819}, + publisher = {Association for Computational Linguistics}, + title = {{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification}, + url = {https://aclanthology.org/N18-1074}, + year = {2018}, +} +""", prompt={ "query": "Given a claim, retrieve documents that support or refute the claim" }, diff --git a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py index acef51ee26..0061fa6c79 100644 --- a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py @@ -30,14 +30,15 @@ class NanoFiQA2018Retrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{ -thakur2021beir, -title={{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, -author={Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, -booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, -year={2021}, -url={https://openreview.net/forum?id=wCu6T5xFjeJ} -}""", + bibtex_citation=r""" +@inproceedings{thakur2021beir, + author = {Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + title = {{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, + url = {https://openreview.net/forum?id=wCu6T5xFjeJ}, + year = {2021}, +} +""", prompt={ "query": "Given a financial question, retrieve user replies that best answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py index cc033a1a2c..4726f328fe 100644 --- a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py @@ -32,29 +32,31 @@ class NanoHotpotQARetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{yang-etal-2018-hotpotqa, - title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", - author = "Yang, Zhilin and - Qi, Peng and - Zhang, Saizheng and - Bengio, Yoshua and - Cohen, William and - Salakhutdinov, Ruslan and - Manning, Christopher D.", - editor = "Riloff, Ellen and - Chiang, David and - Hockenmaier, Julia and - Tsujii, Jun{'}ichi", - booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", - month = oct # "-" # nov, - year = "2018", - address = "Brussels, Belgium", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/D18-1259", - doi = "10.18653/v1/D18-1259", - pages = "2369--2380", - abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", -}""", + bibtex_citation=r""" +@inproceedings{yang-etal-2018-hotpotqa, + abstract = {Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.}, + address = {Brussels, Belgium}, + author = {Yang, Zhilin and +Qi, Peng and +Zhang, Saizheng and +Bengio, Yoshua and +Cohen, William and +Salakhutdinov, Ruslan and +Manning, Christopher D.}, + booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/D18-1259}, + editor = {Riloff, Ellen and +Chiang, David and +Hockenmaier, Julia and +Tsujii, Jun{'}ichi}, + month = oct # {-} # nov, + pages = {2369--2380}, + publisher = {Association for Computational Linguistics}, + title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering}, + url = {https://aclanthology.org/D18-1259}, + year = {2018}, +} +""", prompt={ "query": "Given a multi-hop question, retrieve documents that can help answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py index d30c3c24b4..b2d5241cdc 100644 --- a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py @@ -30,26 +30,27 @@ class NanoMSMARCORetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{DBLP:journals/corr/NguyenRSGTMD16, - author = {Tri Nguyen and - Mir Rosenberg and - Xia Song and - Jianfeng Gao and - Saurabh Tiwary and - Rangan Majumder and - Li Deng}, - title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, - journal = {CoRR}, - volume = {abs/1611.09268}, - year = {2016}, - url = {http://arxiv.org/abs/1611.09268}, - archivePrefix = {arXiv}, - eprint = {1611.09268}, + bibtex_citation=r""" +@article{DBLP:journals/corr/NguyenRSGTMD16, + archiveprefix = {arXiv}, + author = {Tri Nguyen and +Mir Rosenberg and +Xia Song and +Jianfeng Gao and +Saurabh Tiwary and +Rangan Majumder and +Li Deng}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, + eprint = {1611.09268}, + journal = {CoRR}, timestamp = {Mon, 13 Aug 2018 16:49:03 +0200}, - biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} + title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, + url = {http://arxiv.org/abs/1611.09268}, + volume = {abs/1611.09268}, + year = {2016}, } -}""", +""", prompt={ "query": "Given a web search query, retrieve relevant passages that answer the query" }, diff --git a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py index 157491df9b..e72d9647e3 100644 --- a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py @@ -30,16 +30,18 @@ class NanoNFCorpusRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{boteva2016, + bibtex_citation=r""" +@inproceedings{boteva2016, author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan}, - title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + city = {Padova}, + country = {Italy}, journal = {Proceedings of the 38th European Conference on Information Retrieval}, journal-abbrev = {ECIR}, + title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf}, year = {2016}, - city = {Padova}, - country = {Italy}, - url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf} -}""", +} +""", prompt={ "query": "Given a question, retrieve relevant documents that best answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py index 0cac9088bc..52c741d988 100644 --- a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py @@ -30,12 +30,18 @@ class NanoNQRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{47761,title = {Natural Questions: a Benchmark for Question Answering Research}, - author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh - and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee - and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le - and Slav Petrov},year = {2019},journal = {Transactions of the Association of Computational - Linguistics}}""", + bibtex_citation=r""" +@article{47761, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh +and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee +and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le +and Slav Petrov}, + journal = {Transactions of the Association of Computational +Linguistics}, + title = {Natural Questions: a Benchmark for Question Answering Research}, + year = {2019}, +} +""", prompt={ "query": "Given a question, retrieve Wikipedia passages that answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py index f22e2c4420..a8cd253cc9 100644 --- a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py @@ -32,13 +32,15 @@ class NanoQuoraRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{quora-question-pairs, - author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, - title = {Quora Question Pairs}, - publisher = {Kaggle}, - year = {2017}, - url = {https://kaggle.com/competitions/quora-question-pairs} -}""", + bibtex_citation=r""" +@misc{quora-question-pairs, + author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, + publisher = {Kaggle}, + title = {Quora Question Pairs}, + url = {https://kaggle.com/competitions/quora-question-pairs}, + year = {2017}, +} +""", prompt={ "query": "Given a question, retrieve questions that are semantically equivalent to the given question" }, diff --git a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py index 7f376ca30d..ebd97ace44 100644 --- a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py @@ -32,12 +32,14 @@ class NanoSCIDOCSRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{specter2020cohan, - title={SPECTER: Document-level Representation Learning using Citation-informed Transformers}, - author={Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, - booktitle={ACL}, - year={2020} -}""", + bibtex_citation=r""" +@inproceedings{specter2020cohan, + author = {Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, + booktitle = {ACL}, + title = {SPECTER: Document-level Representation Learning using Citation-informed Transformers}, + year = {2020}, +} +""", prompt={ "query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper" }, diff --git a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py index 63e1827de8..8e2716c401 100644 --- a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py @@ -30,12 +30,14 @@ class NanoSciFactRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{specter2020cohan, - title={SPECTER: Document-level Representation Learning using Citation-informed Transformers}, - author={Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, - booktitle={ACL}, - year={2020} -}""", + bibtex_citation=r""" +@inproceedings{specter2020cohan, + author = {Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, + booktitle = {ACL}, + title = {SPECTER: Document-level Representation Learning using Citation-informed Transformers}, + year = {2020}, +} +""", prompt={ "query": "Given a scientific claim, retrieve documents that support or refute the claim" }, diff --git a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py index cd07fa8453..d48f90ba2b 100644 --- a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py +++ b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py @@ -30,23 +30,25 @@ class NanoTouche2020Retrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@dataset{potthast_2022_6862281, - author = {Potthast, Martin and - Gienapp, Lukas and - Wachsmuth, Henning and - Hagen, Matthias and - Fröbe, Maik and - Bondarenko, Alexander and - Ajjour, Yamen and - Stein, Benno}, - title = {{Touché20-Argument-Retrieval-for-Controversial- - Questions}}, - month = jul, - year = 2022, - publisher = {Zenodo}, - doi = {10.5281/zenodo.6862281}, - url = {https://doi.org/10.5281/zenodo.6862281} -}""", + bibtex_citation=r""" +@dataset{potthast_2022_6862281, + author = {Potthast, Martin and +Gienapp, Lukas and +Wachsmuth, Henning and +Hagen, Matthias and +Fröbe, Maik and +Bondarenko, Alexander and +Ajjour, Yamen and +Stein, Benno}, + doi = {10.5281/zenodo.6862281}, + month = jul, + publisher = {Zenodo}, + title = {{Touché20-Argument-Retrieval-for-Controversial- +Questions}}, + url = {https://doi.org/10.5281/zenodo.6862281}, + year = {2022}, +} +""", prompt={ "query": "Given a question, retrieve detailed and persuasive arguments that answer the question" }, diff --git a/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py b/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py index 988048bb0b..bd5ce1dc5a 100644 --- a/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py @@ -34,14 +34,16 @@ class NarrativeQARetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{kočiský2017narrativeqa, - title={The NarrativeQA Reading Comprehension Challenge}, - author={Tomáš Kočiský and Jonathan Schwarz and Phil Blunsom and Chris Dyer and Karl Moritz Hermann and Gábor Melis and Edward Grefenstette}, - year={2017}, - eprint={1712.07040}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{kočiský2017narrativeqa, + archiveprefix = {arXiv}, + author = {Tomáš Kočiský and Jonathan Schwarz and Phil Blunsom and Chris Dyer and Karl Moritz Hermann and Gábor Melis and Edward Grefenstette}, + eprint = {1712.07040}, + primaryclass = {cs.CL}, + title = {The NarrativeQA Reading Comprehension Challenge}, + year = {2017}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/eng/PiqaRetrieval.py b/mteb/tasks/Retrieval/eng/PiqaRetrieval.py index 335c252a7e..4b3e7126a9 100644 --- a/mteb/tasks/Retrieval/eng/PiqaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/PiqaRetrieval.py @@ -28,20 +28,22 @@ class PIQA(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @inproceedings{bisk2020piqa, - title={Piqa: Reasoning about physical commonsense in natural language}, - author={Bisk, Yonatan and Zellers, Rowan and Gao, Jianfeng and Choi, Yejin and others}, - booktitle={Proceedings of the AAAI conference on artificial intelligence}, - volume={34}, - number={05}, - pages={7432--7439}, - year={2020} + author = {Bisk, Yonatan and Zellers, Rowan and Gao, Jianfeng and Choi, Yejin and others}, + booktitle = {Proceedings of the AAAI conference on artificial intelligence}, + number = {05}, + pages = {7432--7439}, + title = {Piqa: Reasoning about physical commonsense in natural language}, + volume = {34}, + year = {2020}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={"query": "Given the following goal, retrieve a possible solution."}, diff --git a/mteb/tasks/Retrieval/eng/QuailRetrieval.py b/mteb/tasks/Retrieval/eng/QuailRetrieval.py index 221e11cc0f..b89eed9792 100644 --- a/mteb/tasks/Retrieval/eng/QuailRetrieval.py +++ b/mteb/tasks/Retrieval/eng/QuailRetrieval.py @@ -28,20 +28,22 @@ class Quail(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @inproceedings{rogers2020getting, - title={Getting closer to AI complete question answering: A set of prerequisite real tasks}, - author={Rogers, Anna and Kovaleva, Olga and Downey, Matthew and Rumshisky, Anna}, - booktitle={Proceedings of the AAAI conference on artificial intelligence}, - volume={34}, - number={05}, - pages={8722--8731}, - year={2020} + author = {Rogers, Anna and Kovaleva, Olga and Downey, Matthew and Rumshisky, Anna}, + booktitle = {Proceedings of the AAAI conference on artificial intelligence}, + number = {05}, + pages = {8722--8731}, + title = {Getting closer to AI complete question answering: A set of prerequisite real tasks}, + volume = {34}, + year = {2020}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/QuoraRetrieval.py b/mteb/tasks/Retrieval/eng/QuoraRetrieval.py index 9210ab1249..de5b59d953 100644 --- a/mteb/tasks/Retrieval/eng/QuoraRetrieval.py +++ b/mteb/tasks/Retrieval/eng/QuoraRetrieval.py @@ -32,13 +32,15 @@ class QuoraRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{quora-question-pairs, - author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, - title = {Quora Question Pairs}, - publisher = {Kaggle}, - year = {2017}, - url = {https://kaggle.com/competitions/quora-question-pairs} -}""", + bibtex_citation=r""" +@misc{quora-question-pairs, + author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, + publisher = {Kaggle}, + title = {Quora Question Pairs}, + url = {https://kaggle.com/competitions/quora-question-pairs}, + year = {2017}, +} +""", prompt={ "query": "Given a question, retrieve questions that are semantically equivalent to the given question" }, @@ -72,12 +74,14 @@ class QuoraRetrievalHardNegatives(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{quora-question-pairs, - author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, - title = {Quora Question Pairs}, - publisher = {Kaggle}, - year = {2017}, - url = {https://kaggle.com/competitions/quora-question-pairs} -}""", + bibtex_citation=r""" +@misc{quora-question-pairs, + author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, + publisher = {Kaggle}, + title = {Quora Question Pairs}, + url = {https://kaggle.com/competitions/quora-question-pairs}, + year = {2017}, +} +""", adapted_from=["QuoraRetrieval"], ) diff --git a/mteb/tasks/Retrieval/eng/RARbCodeRetrieval.py b/mteb/tasks/Retrieval/eng/RARbCodeRetrieval.py index b42cd4bd71..bdc4518fd2 100644 --- a/mteb/tasks/Retrieval/eng/RARbCodeRetrieval.py +++ b/mteb/tasks/Retrieval/eng/RARbCodeRetrieval.py @@ -28,28 +28,26 @@ class RARbCode(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} + bibtex_citation=r""" +@article{husain2019codesearchnet, + author = {Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, + journal = {arXiv preprint arXiv:1909.09436}, + title = {Codesearchnet challenge: Evaluating the state of semantic code search}, + year = {2019}, } + @article{muennighoff2023octopack, - title={Octopack: Instruction tuning code large language models}, - author={Muennighoff, Niklas and Liu, Qian and Zebaze, Armel and Zheng, Qinkai and Hui, Binyuan and Zhuo, Terry Yue and Singh, Swayam and Tang, Xiangru and Von Werra, Leandro and Longpre, Shayne}, - journal={arXiv preprint arXiv:2308.07124}, - year={2023} + author = {Muennighoff, Niklas and Liu, Qian and Zebaze, Armel and Zheng, Qinkai and Hui, Binyuan and Zhuo, Terry Yue and Singh, Swayam and Tang, Xiangru and Von Werra, Leandro and Longpre, Shayne}, + journal = {arXiv preprint arXiv:2308.07124}, + title = {Octopack: Instruction tuning code large language models}, + year = {2023}, } -@article{austin2021program, - title={Program Synthesis with Large Language Models}, - author={Austin, Jacob and Odena, Augustus and Nye, Maxwell and Bosma, Maarten and Michalewski, Henryk and Dohan, David and Jiang, Ellen and Cai, Carrie and Terry, Michael and Le, Quoc and others}, - journal={arXiv preprint arXiv:2108.07732}, - year={2021} -@article{husain2019codesearchnet, - title={Codesearchnet challenge: Evaluating the state of semantic code search}, - author={Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, - journal={arXiv preprint arXiv:1909.09436}, - year={2019} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={"query": "Retrieve the answer for the following coding problem."}, diff --git a/mteb/tasks/Retrieval/eng/RARbMathRetrieval.py b/mteb/tasks/Retrieval/eng/RARbMathRetrieval.py index 88855a8eaf..05e55becd4 100644 --- a/mteb/tasks/Retrieval/eng/RARbMathRetrieval.py +++ b/mteb/tasks/Retrieval/eng/RARbMathRetrieval.py @@ -28,29 +28,33 @@ class RARbMath(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} + bibtex_citation=r""" +@article{cobbe2021training, + author = {Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad and Chen, Mark and Jun, Heewoo and Kaiser, Lukasz and Plappert, Matthias and Tworek, Jerry and Hilton, Jacob and Nakano, Reiichiro and others}, + journal = {arXiv preprint arXiv:2110.14168}, + title = {Training verifiers to solve math word problems}, + year = {2021}, } + @article{hendrycks2021measuring, - title={Measuring mathematical problem solving with the math dataset}, - author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob}, - journal={arXiv preprint arXiv:2103.03874}, - year={2021} + author = {Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob}, + journal = {arXiv preprint arXiv:2103.03874}, + title = {Measuring mathematical problem solving with the math dataset}, + year = {2021}, } -@article{cobbe2021training, - title={Training verifiers to solve math word problems}, - author={Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad and Chen, Mark and Jun, Heewoo and Kaiser, Lukasz and Plappert, Matthias and Tworek, Jerry and Hilton, Jacob and Nakano, Reiichiro and others}, - journal={arXiv preprint arXiv:2110.14168}, - year={2021} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } + @article{yu2023metamath, - title={Metamath: Bootstrap your own mathematical questions for large language models}, - author={Yu, Longhui and Jiang, Weisen and Shi, Han and Yu, Jincheng and Liu, Zhengying and Zhang, Yu and Kwok, James T and Li, Zhenguo and Weller, Adrian and Liu, Weiyang}, - journal={arXiv preprint arXiv:2309.12284}, - year={2023} + author = {Yu, Longhui and Jiang, Weisen and Shi, Han and Yu, Jincheng and Liu, Zhengying and Zhang, Yu and Kwok, James T and Li, Zhenguo and Weller, Adrian and Liu, Weiyang}, + journal = {arXiv preprint arXiv:2309.12284}, + title = {Metamath: Bootstrap your own mathematical questions for large language models}, + year = {2023}, } """, prompt={"query": "Retrieve the answer for the following math problem."}, diff --git a/mteb/tasks/Retrieval/eng/SCIDOCSRetrieval.py b/mteb/tasks/Retrieval/eng/SCIDOCSRetrieval.py index 231c695d48..4ab6911e42 100644 --- a/mteb/tasks/Retrieval/eng/SCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/eng/SCIDOCSRetrieval.py @@ -30,12 +30,14 @@ class SCIDOCS(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{specter2020cohan, - title={SPECTER: Document-level Representation Learning using Citation-informed Transformers}, - author={Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, - booktitle={ACL}, - year={2020} -}""", + bibtex_citation=r""" +@inproceedings{specter2020cohan, + author = {Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, + booktitle = {ACL}, + title = {SPECTER: Document-level Representation Learning using Citation-informed Transformers}, + year = {2020}, +} +""", prompt={ "query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper" }, diff --git a/mteb/tasks/Retrieval/eng/SciFactRetrieval.py b/mteb/tasks/Retrieval/eng/SciFactRetrieval.py index a44eb052bd..f36f7247cf 100644 --- a/mteb/tasks/Retrieval/eng/SciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/SciFactRetrieval.py @@ -27,12 +27,14 @@ class SciFact(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{specter2020cohan, - title={SPECTER: Document-level Representation Learning using Citation-informed Transformers}, - author={Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, - booktitle={ACL}, - year={2020} -}""", + bibtex_citation=r""" +@inproceedings{specter2020cohan, + author = {Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, + booktitle = {ACL}, + title = {SPECTER: Document-level Representation Learning using Citation-informed Transformers}, + year = {2020}, +} +""", prompt={ "query": "Given a scientific claim, retrieve documents that support or refute the claim" }, diff --git a/mteb/tasks/Retrieval/eng/SiqaRetrieval.py b/mteb/tasks/Retrieval/eng/SiqaRetrieval.py index b8c42f7675..922e750a6a 100644 --- a/mteb/tasks/Retrieval/eng/SiqaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/SiqaRetrieval.py @@ -28,17 +28,19 @@ class SIQA(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{sap2019socialiqa, - title={Socialiqa: Commonsense reasoning about social interactions}, - author={Sap, Maarten and Rashkin, Hannah and Chen, Derek and LeBras, Ronan and Choi, Yejin}, - journal={arXiv preprint arXiv:1904.09728}, - year={2019} + author = {Sap, Maarten and Rashkin, Hannah and Chen, Derek and LeBras, Ronan and Choi, Yejin}, + journal = {arXiv preprint arXiv:1904.09728}, + title = {Socialiqa: Commonsense reasoning about social interactions}, + year = {2019}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/SpartQARetrieval.py b/mteb/tasks/Retrieval/eng/SpartQARetrieval.py index c0262f01cd..f4fb275e37 100644 --- a/mteb/tasks/Retrieval/eng/SpartQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/SpartQARetrieval.py @@ -28,17 +28,19 @@ class SpartQA(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{mirzaee2021spartqa, - title={Spartqa:: A textual question answering benchmark for spatial reasoning}, - author={Mirzaee, Roshanak and Faghihi, Hossein Rajaby and Ning, Qiang and Kordjmashidi, Parisa}, - journal={arXiv preprint arXiv:2104.05832}, - year={2021} + author = {Mirzaee, Roshanak and Faghihi, Hossein Rajaby and Ning, Qiang and Kordjmashidi, Parisa}, + journal = {arXiv preprint arXiv:2104.05832}, + title = {Spartqa:: A textual question answering benchmark for spatial reasoning}, + year = {2021}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py b/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py index f48adcb255..3b3764a4fd 100644 --- a/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py @@ -27,14 +27,16 @@ class TRECCOVID(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{roberts2021searching, - title={Searching for Scientific Evidence in a Pandemic: An Overview of TREC-COVID}, - author={Kirk Roberts and Tasmeer Alam and Steven Bedrick and Dina Demner-Fushman and Kyle Lo and Ian Soboroff and Ellen Voorhees and Lucy Lu Wang and William R Hersh}, - year={2021}, - eprint={2104.09632}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{roberts2021searching, + archiveprefix = {arXiv}, + author = {Kirk Roberts and Tasmeer Alam and Steven Bedrick and Dina Demner-Fushman and Kyle Lo and Ian Soboroff and Ellen Voorhees and Lucy Lu Wang and William R Hersh}, + eprint = {2104.09632}, + primaryclass = {cs.IR}, + title = {Searching for Scientific Evidence in a Pandemic: An Overview of TREC-COVID}, + year = {2021}, +} +""", prompt={ "query": "Given a query on COVID-19, retrieve documents that answer the query" }, diff --git a/mteb/tasks/Retrieval/eng/TempReasonL1Retrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL1Retrieval.py index 392dd1c1b7..4a939593b3 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL1Retrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL1Retrieval.py @@ -28,17 +28,19 @@ class TempReasonL1(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/TempReasonL2ContextRetrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL2ContextRetrieval.py index 924c1621f5..25e1fa96da 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL2ContextRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL2ContextRetrieval.py @@ -28,17 +28,19 @@ class TempReasonL2Context(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/TempReasonL2FactRetrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL2FactRetrieval.py index 4e1fc53a29..4f280a9d65 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL2FactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL2FactRetrieval.py @@ -28,17 +28,19 @@ class TempReasonL2Fact(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/TempReasonL2PureRetrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL2PureRetrieval.py index b69989af03..4e0899004d 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL2PureRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL2PureRetrieval.py @@ -28,17 +28,19 @@ class TempReasonL2Pure(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={"query": "Given the following question, retrieve the correct answer."}, diff --git a/mteb/tasks/Retrieval/eng/TempReasonL3ContextRetrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL3ContextRetrieval.py index 65f70ab13a..26e7b44404 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL3ContextRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL3ContextRetrieval.py @@ -28,17 +28,19 @@ class TempReasonL3Context(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/TempReasonL3FactRetrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL3FactRetrieval.py index 65db6a70ba..532469d67c 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL3FactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL3FactRetrieval.py @@ -28,17 +28,19 @@ class TempReasonL3Fact(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/eng/TempReasonL3PureRetrieval.py b/mteb/tasks/Retrieval/eng/TempReasonL3PureRetrieval.py index 32738f7180..12340dd35b 100644 --- a/mteb/tasks/Retrieval/eng/TempReasonL3PureRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TempReasonL3PureRetrieval.py @@ -28,17 +28,19 @@ class TempReasonL3Pure(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{tan2023towards, - title={Towards benchmarking and improving the temporal reasoning capability of large language models}, - author={Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, - journal={arXiv preprint arXiv:2306.08952}, - year={2023} + author = {Tan, Qingyu and Ng, Hwee Tou and Bing, Lidong}, + journal = {arXiv preprint arXiv:2306.08952}, + title = {Towards benchmarking and improving the temporal reasoning capability of large language models}, + year = {2023}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={"query": "Given the following question, retrieve the correct answer."}, diff --git a/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py b/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py index 5814fbc648..dddb0e0c89 100644 --- a/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py @@ -40,16 +40,16 @@ class TopiOCQARetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{adlakha2022topiocqa, - title={TopiOCQA: Open-domain Conversational Question Answering with Topic Switching}, - author={Vaibhav Adlakha and Shehzaad Dhuliawala and Kaheer Suleman and Harm de Vries and Siva Reddy}, - year={2022}, - eprint={2110.00768}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{adlakha2022topiocqa, + archiveprefix = {arXiv}, + author = {Vaibhav Adlakha and Shehzaad Dhuliawala and Kaheer Suleman and Harm de Vries and Siva Reddy}, + eprint = {2110.00768}, + primaryclass = {cs.CL}, + title = {TopiOCQA: Open-domain Conversational Question Answering with Topic Switching}, + year = {2022}, +} +""", ) # TODO: Will be removed if curated and added to mteb HF @@ -123,15 +123,15 @@ class TopiOCQARetrievalHardNegatives(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @misc{adlakha2022topiocqa, - title={TopiOCQA: Open-domain Conversational Question Answering with Topic Switching}, - author={Vaibhav Adlakha and Shehzaad Dhuliawala and Kaheer Suleman and Harm de Vries and Siva Reddy}, - year={2022}, - eprint={2110.00768}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{adlakha2022topiocqa, + archiveprefix = {arXiv}, + author = {Vaibhav Adlakha and Shehzaad Dhuliawala and Kaheer Suleman and Harm de Vries and Siva Reddy}, + eprint = {2110.00768}, + primaryclass = {cs.CL}, + title = {TopiOCQA: Open-domain Conversational Question Answering with Topic Switching}, + year = {2022}, +} +""", adapted_from=["TopiOCQA"], ) diff --git a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py index e97452da60..dd196b7f3e 100644 --- a/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py +++ b/mteb/tasks/Retrieval/eng/Touche2020Retrieval.py @@ -28,23 +28,25 @@ class Touche2020(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@dataset{potthast_2022_6862281, - author = {Potthast, Martin and - Gienapp, Lukas and - Wachsmuth, Henning and - Hagen, Matthias and - Fröbe, Maik and - Bondarenko, Alexander and - Ajjour, Yamen and - Stein, Benno}, - title = {{Touché20-Argument-Retrieval-for-Controversial- - Questions}}, - month = jul, - year = 2022, - publisher = {Zenodo}, - doi = {10.5281/zenodo.6862281}, - url = {https://doi.org/10.5281/zenodo.6862281} -}""", + bibtex_citation=r""" +@dataset{potthast_2022_6862281, + author = {Potthast, Martin and +Gienapp, Lukas and +Wachsmuth, Henning and +Hagen, Matthias and +Fröbe, Maik and +Bondarenko, Alexander and +Ajjour, Yamen and +Stein, Benno}, + doi = {10.5281/zenodo.6862281}, + month = jul, + publisher = {Zenodo}, + title = {{Touché20-Argument-Retrieval-for-Controversial- +Questions}}, + url = {https://doi.org/10.5281/zenodo.6862281}, + year = {2022}, +} +""", prompt={ "query": "Given a question, retrieve detailed and persuasive arguments that answer the question" }, @@ -74,12 +76,14 @@ class Touche2020v3Retrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@INPROCEEDINGS{Thakur_etal_SIGIR2024, - author = "Nandan Thakur and Luiz Bonifacio and Maik {Fr\"{o}be} and Alexander Bondarenko and Ehsan Kamalloo and Martin Potthast and Matthias Hagen and Jimmy Lin", - title = "Systematic Evaluation of Neural Retrieval Models on the {Touch\'{e}} 2020 Argument Retrieval Subset of {BEIR}", - booktitle = "Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval", - year = 2024, - address_ = "Washington, D.C." -}""", + bibtex_citation=r""" +@inproceedings{Thakur_etal_SIGIR2024, + address_ = {Washington, D.C.}, + author = {Nandan Thakur and Luiz Bonifacio and Maik {Fr\"{o}be} and Alexander Bondarenko and Ehsan Kamalloo and Martin Potthast and Matthias Hagen and Jimmy Lin}, + booktitle = {Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval}, + title = {Systematic Evaluation of Neural Retrieval Models on the {Touch\'{e}} 2020 Argument Retrieval Subset of {BEIR}}, + year = {2024}, +} +""", adapted_from=["Touche2020"], ) diff --git a/mteb/tasks/Retrieval/eng/WinoGrandeRetrieval.py b/mteb/tasks/Retrieval/eng/WinoGrandeRetrieval.py index 01b5f2d1cc..e308f2afd5 100644 --- a/mteb/tasks/Retrieval/eng/WinoGrandeRetrieval.py +++ b/mteb/tasks/Retrieval/eng/WinoGrandeRetrieval.py @@ -28,21 +28,23 @@ class WinoGrande(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -} + bibtex_citation=r""" @article{sakaguchi2021winogrande, - title={Winogrande: An adversarial winograd schema challenge at scale}, - author={Sakaguchi, Keisuke and Bras, Ronan Le and Bhagavatula, Chandra and Choi, Yejin}, - journal={Communications of the ACM}, - volume={64}, - number={9}, - pages={99--106}, - year={2021}, - publisher={ACM New York, NY, USA} + author = {Sakaguchi, Keisuke and Bras, Ronan Le and Bhagavatula, Chandra and Choi, Yejin}, + journal = {Communications of the ACM}, + number = {9}, + pages = {99--106}, + publisher = {ACM New York, NY, USA}, + title = {Winogrande: An adversarial winograd schema challenge at scale}, + volume = {64}, + year = {2021}, +} + +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, } """, prompt={ diff --git a/mteb/tasks/Retrieval/est/estqa.py b/mteb/tasks/Retrieval/est/estqa.py index b8eebb61d9..71dbaa7aa4 100644 --- a/mteb/tasks/Retrieval/est/estqa.py +++ b/mteb/tasks/Retrieval/est/estqa.py @@ -32,12 +32,12 @@ class EstQA(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @mastersthesis{mastersthesis, - author = {Anu Käver}, - title = {Extractive Question Answering for Estonian Language}, - school = {Tallinn University of Technology (TalTech)}, - year = 2021 + author = {Anu Käver}, + school = {Tallinn University of Technology (TalTech)}, + title = {Extractive Question Answering for Estonian Language}, + year = {2021}, } """, ) diff --git a/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py b/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py index ada02b511b..ccc8e7c581 100644 --- a/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py +++ b/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py @@ -30,16 +30,18 @@ class AlloprofRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{lef23, - doi = {10.48550/ARXIV.2302.07738}, - url = {https://arxiv.org/abs/2302.07738}, + bibtex_citation=r""" +@misc{lef23, author = {Lefebvre-Brossard, Antoine and Gazaille, Stephane and Desmarais, Michel C.}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}, + doi = {10.48550/ARXIV.2302.07738}, keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, publisher = {arXiv}, + title = {Alloprof: a new French question-answer education dataset and its use in an information retrieval case study}, + url = {https://arxiv.org/abs/2302.07738}, year = {2023}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} -}""", +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fra/BSARDRetrieval.py b/mteb/tasks/Retrieval/fra/BSARDRetrieval.py index 93509c51fc..9052a40a30 100644 --- a/mteb/tasks/Retrieval/fra/BSARDRetrieval.py +++ b/mteb/tasks/Retrieval/fra/BSARDRetrieval.py @@ -32,18 +32,20 @@ class BSARDRetrieval(AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{louis2022statutory, - title = {A Statutory Article Retrieval Dataset in French}, + bibtex_citation=r""" +@inproceedings{louis2022statutory, + address = {Dublin, Ireland}, author = {Louis, Antoine and Spanakis, Gerasimos}, booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/2022.acl-long.468}, month = may, - year = {2022}, - address = {Dublin, Ireland}, + pages = {6789–6803}, publisher = {Association for Computational Linguistics}, + title = {A Statutory Article Retrieval Dataset in French}, url = {https://aclanthology.org/2022.acl-long.468/}, - doi = {10.18653/v1/2022.acl-long.468}, - pages = {6789–6803}, -}""", + year = {2022}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fra/FQuADRetrieval.py b/mteb/tasks/Retrieval/fra/FQuADRetrieval.py index 20a54b8232..c54a760599 100644 --- a/mteb/tasks/Retrieval/fra/FQuADRetrieval.py +++ b/mteb/tasks/Retrieval/fra/FQuADRetrieval.py @@ -31,25 +31,27 @@ class FQuADRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{dhoffschmidt-etal-2020-fquad, - title = "{FQ}u{AD}: {F}rench Question Answering Dataset", - author = "d{'}Hoffschmidt, Martin and - Belblidia, Wacim and - Heinrich, Quentin and - Brendl{\'e}, Tom and - Vidal, Maxime", - editor = "Cohn, Trevor and - He, Yulan and - Liu, Yang", - booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020", - month = nov, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.findings-emnlp.107", - doi = "10.18653/v1/2020.findings-emnlp.107", - pages = "1193--1208", -}""", + bibtex_citation=r""" +@inproceedings{dhoffschmidt-etal-2020-fquad, + address = {Online}, + author = {d{'}Hoffschmidt, Martin and +Belblidia, Wacim and +Heinrich, Quentin and +Brendl{\'e}, Tom and +Vidal, Maxime}, + booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020}, + doi = {10.18653/v1/2020.findings-emnlp.107}, + editor = {Cohn, Trevor and +He, Yulan and +Liu, Yang}, + month = nov, + pages = {1193--1208}, + publisher = {Association for Computational Linguistics}, + title = {{FQ}u{AD}: {F}rench Question Answering Dataset}, + url = {https://aclanthology.org/2020.findings-emnlp.107}, + year = {2020}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fra/SyntecRetrieval.py b/mteb/tasks/Retrieval/fra/SyntecRetrieval.py index fb17776268..5a161c5b15 100644 --- a/mteb/tasks/Retrieval/fra/SyntecRetrieval.py +++ b/mteb/tasks/Retrieval/fra/SyntecRetrieval.py @@ -31,14 +31,16 @@ class SyntecRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@misc{ciancone2024extending, - title={Extending the Massive Text Embedding Benchmark to French}, - author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, - year={2024}, - eprint={2405.20468}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{ciancone2024extending, + archiveprefix = {arXiv}, + author = {Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, + eprint = {2405.20468}, + primaryclass = {cs.CL}, + title = {Extending the Massive Text Embedding Benchmark to French}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/hun/HunSum2.py b/mteb/tasks/Retrieval/hun/HunSum2.py index 2a82834450..a8399d554b 100644 --- a/mteb/tasks/Retrieval/hun/HunSum2.py +++ b/mteb/tasks/Retrieval/hun/HunSum2.py @@ -34,14 +34,14 @@ class HunSum2AbstractiveRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @misc{barta2024news, - title={From News to Summaries: Building a Hungarian Corpus for Extractive and Abstractive Summarization}, - author={Botond Barta and Dorina Lakatos and Attila Nagy and Milán Konor Nyist and Judit Ács}, - year={2024}, - eprint={2404.03555}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + archiveprefix = {arXiv}, + author = {Botond Barta and Dorina Lakatos and Attila Nagy and Milán Konor Nyist and Judit Ács}, + eprint = {2404.03555}, + primaryclass = {cs.CL}, + title = {From News to Summaries: Building a Hungarian Corpus for Extractive and Abstractive Summarization}, + year = {2024}, } """, ) diff --git a/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py b/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py index 07fb165632..3f16441fee 100644 --- a/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py @@ -29,14 +29,16 @@ class JaQuADRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=None, sample_creation="found", - bibtex_citation="""@misc{so2022jaquad, - title={{JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension}}, - author={ByungHoon So and Kyuhong Byun and Kyungwon Kang and Seongjin Cho}, - year={2022}, - eprint={2202.01764}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{so2022jaquad, + archiveprefix = {arXiv}, + author = {ByungHoon So and Kyuhong Byun and Kyungwon Kang and Seongjin Cho}, + eprint = {2202.01764}, + primaryclass = {cs.CL}, + title = {{JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension}}, + year = {2022}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py index c84b8fb72a..19c42cb474 100644 --- a/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/JaqketRetrieval.py @@ -26,12 +26,14 @@ class JaqketRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@InProceedings{Kurihara_nlp2020, -author = "鈴木正敏 and 鈴木潤 and 松田耕史 and ⻄田京介 and 井之上直也", -title = "JAQKET: クイズを題材にした日本語 QA データセットの構築", -booktitle = "言語処理学会第26回年次大会", -year = "2020", -url = "https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf", -note= "in Japanese" -}""", + bibtex_citation=r""" +@inproceedings{Kurihara_nlp2020, + author = {鈴木正敏 and 鈴木潤 and 松田耕史 and ⻄田京介 and 井之上直也}, + booktitle = {言語処理学会第26回年次大会}, + note = {in Japanese}, + title = {JAQKET: クイズを題材にした日本語 QA データセットの構築}, + url = {https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf}, + year = {2020}, +} +""", ) diff --git a/mteb/tasks/Retrieval/kor/AutoRAGRetrieval.py b/mteb/tasks/Retrieval/kor/AutoRAGRetrieval.py index 10102d73ac..18ae3e45c3 100644 --- a/mteb/tasks/Retrieval/kor/AutoRAGRetrieval.py +++ b/mteb/tasks/Retrieval/kor/AutoRAGRetrieval.py @@ -28,13 +28,15 @@ class AutoRAGRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@misc{kim2024autoragautomatedframeworkoptimization, - title={AutoRAG: Automated Framework for optimization of Retrieval Augmented Generation Pipeline}, - author={Dongkyu Kim and Byoungwook Kim and Donggeon Han and Matouš Eibich}, - year={2024}, - eprint={2410.20878}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2410.20878}, -}""", + bibtex_citation=r""" +@misc{kim2024autoragautomatedframeworkoptimization, + archiveprefix = {arXiv}, + author = {Dongkyu Kim and Byoungwook Kim and Donggeon Han and Matouš Eibich}, + eprint = {2410.20878}, + primaryclass = {cs.CL}, + title = {AutoRAG: Automated Framework for optimization of Retrieval Augmented Generation Pipeline}, + url = {https://arxiv.org/abs/2410.20878}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/Retrieval/kor/KoStrategyQA.py b/mteb/tasks/Retrieval/kor/KoStrategyQA.py index ce64da5432..4c5b7ec906 100644 --- a/mteb/tasks/Retrieval/kor/KoStrategyQA.py +++ b/mteb/tasks/Retrieval/kor/KoStrategyQA.py @@ -27,10 +27,12 @@ class KoStrategyQA(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@article{geva2021strategyqa, - title = {{Did Aristotle Use a Laptop? A Question Answering Benchmark with Implicit Reasoning Strategies}}, + bibtex_citation=r""" +@article{geva2021strategyqa, author = {Geva, Mor and Khashabi, Daniel and Segal, Elad and Khot, Tushar and Roth, Dan and Berant, Jonathan}, journal = {Transactions of the Association for Computational Linguistics (TACL)}, + title = {{Did Aristotle Use a Laptop? A Question Answering Benchmark with Implicit Reasoning Strategies}}, year = {2021}, -}""", +} +""", ) diff --git a/mteb/tasks/Retrieval/multilingual/BelebeleRetrieval.py b/mteb/tasks/Retrieval/multilingual/BelebeleRetrieval.py index 8e134751b0..9284c4cc45 100644 --- a/mteb/tasks/Retrieval/multilingual/BelebeleRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/BelebeleRetrieval.py @@ -202,12 +202,14 @@ class BelebeleRetrieval(MultilingualTask, AbsTaskRetrieval): task_subtypes=["Question answering"], annotations_creators="expert-annotated", dialect=[], - bibtex_citation="""@article{bandarkar2023belebele, - title={The Belebele Benchmark: a Parallel Reading Comprehension Dataset in 122 Language Variants}, - author={Lucas Bandarkar and Davis Liang and Benjamin Muller and Mikel Artetxe and Satya Narayan Shukla and Donald Husa and Naman Goyal and Abhinandan Krishnan and Luke Zettlemoyer and Madian Khabsa}, - year={2023}, - journal={arXiv preprint arXiv:2308.16884} -}""", + bibtex_citation=r""" +@article{bandarkar2023belebele, + author = {Lucas Bandarkar and Davis Liang and Benjamin Muller and Mikel Artetxe and Satya Narayan Shukla and Donald Husa and Naman Goyal and Abhinandan Krishnan and Luke Zettlemoyer and Madian Khabsa}, + journal = {arXiv preprint arXiv:2308.16884}, + title = {The Belebele Benchmark: a Parallel Reading Comprehension Dataset in 122 Language Variants}, + year = {2023}, +} +""", ) def load_data(self, **kwargs) -> None: diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py index 2d207c74bf..6528faba3e 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py @@ -52,7 +52,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval, MultilingualTask annotations_creators="derived", dialect=[], sample_creation="LM-generated and verified", - bibtex_citation="", # preprint_coming + bibtex_citation="", # preprint_coming ) def __init__(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py index 9235c04d04..c569bab909 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py @@ -52,7 +52,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval, MultilingualTask annotations_creators="derived", dialect=[], sample_creation="LM-generated and verified", - bibtex_citation="", # preprint_coming + bibtex_citation="", # preprint_coming ) def __init__(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py b/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py index 62a166f89c..e489142d15 100644 --- a/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py @@ -47,13 +47,15 @@ class IndicQARetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@article{doddapaneni2022towards, - title = {Towards Leaving No Indic Language Behind: Building Monolingual Corpora, Benchmark and Models for Indic Languages}, - author = {Sumanth Doddapaneni and Rahul Aralikatte and Gowtham Ramesh and Shreyansh Goyal and Mitesh M. Khapra and Anoop Kunchukuttan and Pratyush Kumar}, - journal = {Annual Meeting of the Association for Computational Linguistics}, - year = {2022}, - doi = {10.18653/v1/2023.acl-long.693} -}""", + bibtex_citation=r""" +@article{doddapaneni2022towards, + author = {Sumanth Doddapaneni and Rahul Aralikatte and Gowtham Ramesh and Shreyansh Goyal and Mitesh M. Khapra and Anoop Kunchukuttan and Pratyush Kumar}, + doi = {10.18653/v1/2023.acl-long.693}, + journal = {Annual Meeting of the Association for Computational Linguistics}, + title = {Towards Leaving No Indic Language Behind: Building Monolingual Corpora, Benchmark and Models for Indic Languages}, + year = {2022}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py index a4101abbdb..d32d4d4cbd 100644 --- a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py @@ -125,20 +125,22 @@ class MIRACLRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{10.1162/tacl_a_00595, - author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy}, - title = "{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}", - journal = {Transactions of the Association for Computational Linguistics}, - volume = {11}, - pages = {1114-1131}, - year = {2023}, - month = {09}, - abstract = "{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}", - issn = {2307-387X}, - doi = {10.1162/tacl_a_00595}, - url = {https://doi.org/10.1162/tacl\_a\_00595}, - eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf}, -}""", + bibtex_citation=r""" +@article{10.1162/tacl_a_00595, + abstract = {{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}}, + author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy}, + doi = {10.1162/tacl_a_00595}, + eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf}, + issn = {2307-387X}, + journal = {Transactions of the Association for Computational Linguistics}, + month = {09}, + pages = {1114-1131}, + title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}}, + url = {https://doi.org/10.1162/tacl\_a\_00595}, + volume = {11}, + year = {2023}, +} +""", prompt={ "query": "Given a question, retrieve Wikipedia passages that answer the question" }, @@ -320,20 +322,22 @@ class MIRACLRetrievalHardNegatives(MultilingualTask, AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{10.1162/tacl_a_00595, - author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy}, - title = "{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}", - journal = {Transactions of the Association for Computational Linguistics}, - volume = {11}, - pages = {1114-1131}, - year = {2023}, - month = {09}, - abstract = "{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}", - issn = {2307-387X}, - doi = {10.1162/tacl_a_00595}, - url = {https://doi.org/10.1162/tacl\_a\_00595}, - eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf}, -}""", + bibtex_citation=r""" +@article{10.1162/tacl_a_00595, + abstract = {{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}}, + author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy}, + doi = {10.1162/tacl_a_00595}, + eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf}, + issn = {2307-387X}, + journal = {Transactions of the Association for Computational Linguistics}, + month = {09}, + pages = {1114-1131}, + title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}}, + url = {https://doi.org/10.1162/tacl\_a\_00595}, + volume = {11}, + year = {2023}, +} +""", adapted_from=["MIRACLRetrieval"], ) diff --git a/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py b/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py index c03f280b22..8cc7125fb7 100644 --- a/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py @@ -103,13 +103,15 @@ class MLQARetrieval(AbsTaskRetrieval, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{lewis2019mlqa, - title = {MLQA: Evaluating Cross-lingual Extractive Question Answering}, - author = {Lewis, Patrick and Oguz, Barlas and Rinott, Ruty and Riedel, Sebastian and Schwenk, Holger}, - journal = {arXiv preprint arXiv:1910.07475}, - year = 2019, - eid = {arXiv: 1910.07475} - }""", + bibtex_citation=r""" +@article{lewis2019mlqa, + author = {Lewis, Patrick and Oguz, Barlas and Rinott, Ruty and Riedel, Sebastian and Schwenk, Holger}, + eid = {arXiv: 1910.07475}, + journal = {arXiv preprint arXiv:1910.07475}, + title = {MLQA: Evaluating Cross-lingual Extractive Question Answering}, + year = {2019}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py index 3a44ba4e09..50e0d197f4 100644 --- a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py @@ -87,19 +87,21 @@ class MintakaRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", # best guess dialect=[], sample_creation="human-translated", - bibtex_citation="""@inproceedings{sen-etal-2022-mintaka, - title = "Mintaka: A Complex, Natural, and Multilingual Dataset for End-to-End Question Answering", - author = "Sen, Priyanka and - Aji, Alham Fikri and - Saffari, Amir", - booktitle = "Proceedings of the 29th International Conference on Computational Linguistics", - month = oct, - year = "2022", - address = "Gyeongju, Republic of Korea", - publisher = "International Committee on Computational Linguistics", - url = "https://aclanthology.org/2022.coling-1.138", - pages = "1604--1619" -}""", + bibtex_citation=r""" +@inproceedings{sen-etal-2022-mintaka, + address = {Gyeongju, Republic of Korea}, + author = {Sen, Priyanka and +Aji, Alham Fikri and +Saffari, Amir}, + booktitle = {Proceedings of the 29th International Conference on Computational Linguistics}, + month = oct, + pages = {1604--1619}, + publisher = {International Committee on Computational Linguistics}, + title = {Mintaka: A Complex, Natural, and Multilingual Dataset for End-to-End Question Answering}, + url = {https://aclanthology.org/2022.coling-1.138}, + year = {2022}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py index 5fcd725ac5..a11ef0df92 100644 --- a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py @@ -107,12 +107,14 @@ class MrTidyRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{mrtydi, - title={{Mr. TyDi}: A Multi-lingual Benchmark for Dense Retrieval}, - author={Xinyu Zhang and Xueguang Ma and Peng Shi and Jimmy Lin}, - year={2021}, - journal={arXiv:2108.08787}, - }""", + bibtex_citation=r""" +@article{mrtydi, + author = {Xinyu Zhang and Xueguang Ma and Peng Shi and Jimmy Lin}, + journal = {arXiv:2108.08787}, + title = {{Mr. TyDi}: A Multi-lingual Benchmark for Dense Retrieval}, + year = {2021}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py index 65cd3c6467..e143dac611 100644 --- a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py @@ -92,13 +92,14 @@ class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="LM-generated", # gpt-3.5 dialect=[], sample_creation="found", - bibtex_citation="""@misc{bge-m3, - title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation}, - author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu}, - year={2024}, - eprint={2402.03216}, - archivePrefix={arXiv}, - primaryClass={cs.CL} + bibtex_citation=r""" +@misc{bge-m3, + archiveprefix = {arXiv}, + author = {Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu}, + eprint = {2402.03216}, + primaryclass = {cs.CL}, + title = {BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation}, + year = {2024}, } """, ) diff --git a/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py b/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py index 2872a4d396..865473c0dc 100644 --- a/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py +++ b/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py @@ -80,12 +80,14 @@ class NeuCLIR2022Retrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{lawrie2023overview, - title={Overview of the TREC 2022 NeuCLIR track}, - author={Lawrie, Dawn and MacAvaney, Sean and Mayfield, James and McNamee, Paul and Oard, Douglas W and Soldaini, Luca and Yang, Eugene}, - journal={arXiv preprint arXiv:2304.12367}, - year={2023} -}""", + bibtex_citation=r""" +@article{lawrie2023overview, + author = {Lawrie, Dawn and MacAvaney, Sean and Mayfield, James and McNamee, Paul and Oard, Douglas W and Soldaini, Luca and Yang, Eugene}, + journal = {arXiv preprint arXiv:2304.12367}, + title = {Overview of the TREC 2022 NeuCLIR track}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): @@ -193,12 +195,14 @@ class NeuCLIR2022RetrievalHardNegatives(MultilingualTask, AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@article{lawrie2023overview, - title={Overview of the TREC 2022 NeuCLIR track}, - author={Lawrie, Dawn and MacAvaney, Sean and Mayfield, James and McNamee, Paul and Oard, Douglas W and Soldaini, Luca and Yang, Eugene}, - journal={arXiv preprint arXiv:2304.12367}, - year={2023} -}""", + bibtex_citation=r""" +@article{lawrie2023overview, + author = {Lawrie, Dawn and MacAvaney, Sean and Mayfield, James and McNamee, Paul and Oard, Douglas W and Soldaini, Luca and Yang, Eugene}, + journal = {arXiv preprint arXiv:2304.12367}, + title = {Overview of the TREC 2022 NeuCLIR track}, + year = {2023}, +} +""", adapted_from=["NeuCLIR2022Retrieval"], ) diff --git a/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py b/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py index 675505df85..f28198b474 100644 --- a/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py +++ b/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py @@ -79,14 +79,16 @@ class NeuCLIR2023Retrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{lawrie2024overview, - title={Overview of the TREC 2023 NeuCLIR Track}, - author={Dawn Lawrie and Sean MacAvaney and James Mayfield and Paul McNamee and Douglas W. Oard and Luca Soldaini and Eugene Yang}, - year={2024}, - eprint={2404.08071}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{lawrie2024overview, + archiveprefix = {arXiv}, + author = {Dawn Lawrie and Sean MacAvaney and James Mayfield and Paul McNamee and Douglas W. Oard and Luca Soldaini and Eugene Yang}, + eprint = {2404.08071}, + primaryclass = {cs.IR}, + title = {Overview of the TREC 2023 NeuCLIR Track}, + year = {2024}, +} +""", ) def load_data(self, **kwargs): @@ -194,14 +196,16 @@ class NeuCLIR2023RetrievalHardNegatives(MultilingualTask, AbsTaskRetrieval): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{lawrie2024overview, - title={Overview of the TREC 2023 NeuCLIR Track}, - author={Dawn Lawrie and Sean MacAvaney and James Mayfield and Paul McNamee and Douglas W. Oard and Luca Soldaini and Eugene Yang}, - year={2024}, - eprint={2404.08071}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{lawrie2024overview, + archiveprefix = {arXiv}, + author = {Dawn Lawrie and Sean MacAvaney and James Mayfield and Paul McNamee and Douglas W. Oard and Luca Soldaini and Eugene Yang}, + eprint = {2404.08071}, + primaryclass = {cs.IR}, + title = {Overview of the TREC 2023 NeuCLIR Track}, + year = {2024}, +} +""", adapted_from=["NeuCLIR2022Retrieval"], ) diff --git a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py index 6f7d188b7b..974173c82b 100644 --- a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py @@ -85,14 +85,14 @@ class PublicHealthQARetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" -@misc {xing_han_lu_2024, - author = { {Xing Han Lu} }, - title = { publichealth-qa (Revision 3b67b6b) }, - year = 2024, - url = { https://huggingface.co/datasets/xhluca/publichealth-qa }, - doi = { 10.57967/hf/2247 }, - publisher = { Hugging Face } + bibtex_citation=r""" +@misc{xing_han_lu_2024, + author = { {Xing Han Lu} }, + doi = { 10.57967/hf/2247 }, + publisher = { Hugging Face }, + title = { publichealth-qa (Revision 3b67b6b) }, + url = { https://huggingface.co/datasets/xhluca/publichealth-qa }, + year = {2024}, } """, ) diff --git a/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py b/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py index ab7e178c82..c607c7c408 100644 --- a/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py @@ -85,19 +85,19 @@ class StatcanDialogueDatasetRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{lu-etal-2023-statcan, - title = "The {S}tat{C}an Dialogue Dataset: Retrieving Data Tables through Conversations with Genuine Intents", - author = "Lu, Xing Han and - Reddy, Siva and - de Vries, Harm", - booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics", - month = may, - year = "2023", - address = "Dubrovnik, Croatia", - publisher = "Association for Computational Linguistics", - url = "https://arxiv.org/abs/2304.01412", - pages = "2799--2829", + address = {Dubrovnik, Croatia}, + author = {Lu, Xing Han and +Reddy, Siva and +de Vries, Harm}, + booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics}, + month = may, + pages = {2799--2829}, + publisher = {Association for Computational Linguistics}, + title = {The {S}tat{C}an Dialogue Dataset: Retrieving Data Tables through Conversations with Genuine Intents}, + url = {https://arxiv.org/abs/2304.01412}, + year = {2023}, } """, ) diff --git a/mteb/tasks/Retrieval/multilingual/WebFAQRetrieval.py b/mteb/tasks/Retrieval/multilingual/WebFAQRetrieval.py index 703103a782..64e5646396 100644 --- a/mteb/tasks/Retrieval/multilingual/WebFAQRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/WebFAQRetrieval.py @@ -147,15 +147,17 @@ class WebFAQRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@misc{dinzinger2025webfaq, - title={WebFAQ: A Multilingual Collection of Natural Q&A Datasets for Dense Retrieval}, - author={Michael Dinzinger and Laura Caspari and Kanishka Ghosh Dastidar and Jelena Mitrović and Michael Granitzer}, - year={2025}, - eprint={2502.20936}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2502.20936}, -}""", + bibtex_citation=r""" +@misc{dinzinger2025webfaq, + archiveprefix = {arXiv}, + author = {Michael Dinzinger and Laura Caspari and Kanishka Ghosh Dastidar and Jelena Mitrović and Michael Granitzer}, + eprint = {2502.20936}, + primaryclass = {cs.CL}, + title = {WebFAQ: A Multilingual Collection of Natural Q&A Datasets for Dense Retrieval}, + url = {https://arxiv.org/abs/2502.20936}, + year = {2025}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py index bf3dfe8218..f630009419 100644 --- a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py @@ -86,16 +86,19 @@ class XMarket(MultilingualTask, AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation=""" -@inproceedings{Bonab_2021, series={CIKM ’21}, - title={Cross-Market Product Recommendation}, - url={http://dx.doi.org/10.1145/3459637.3482493}, - DOI={10.1145/3459637.3482493}, - booktitle={Proceedings of the 30th ACM International Conference on Information & Knowledge Management}, - publisher={ACM}, - author={Bonab, Hamed and Aliannejadi, Mohammad and Vardasbi, Ali and Kanoulas, Evangelos and Allan, James}, - year={2021}, - month=oct, collection={CIKM ’21} } + bibtex_citation=r""" +@inproceedings{Bonab_2021, + author = {Bonab, Hamed and Aliannejadi, Mohammad and Vardasbi, Ali and Kanoulas, Evangelos and Allan, James}, + booktitle = {Proceedings of the 30th ACM International Conference on Information & Knowledge Management}, + collection = {CIKM ’21}, + doi = {10.1145/3459637.3482493}, + month = oct, + publisher = {ACM}, + series = {CIKM ’21}, + title = {Cross-Market Product Recommendation}, + url = {http://dx.doi.org/10.1145/3459637.3482493}, + year = {2021}, +} """, ) diff --git a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py index 72cbbd6dab..942481138f 100644 --- a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py @@ -85,13 +85,15 @@ class XPQARetrieval(AbsTaskRetrieval, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{shen2023xpqa, - title={xPQA: Cross-Lingual Product Question Answering in 12 Languages}, - author={Shen, Xiaoyu and Asai, Akari and Byrne, Bill and De Gispert, Adria}, - booktitle={Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)}, - pages={103--115}, - year={2023} - }""", + bibtex_citation=r""" +@inproceedings{shen2023xpqa, + author = {Shen, Xiaoyu and Asai, Akari and Byrne, Bill and De Gispert, Adria}, + booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)}, + pages = {103--115}, + title = {xPQA: Cross-Lingual Product Question Answering in 12 Languages}, + year = {2023}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py b/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py index 4d952896e3..2886eeffe4 100644 --- a/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py @@ -47,23 +47,25 @@ class XQuADRetrieval(MultilingualTask, AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{Artetxe:etal:2019, - author = {Mikel Artetxe and Sebastian Ruder and Dani Yogatama}, - title = {On the cross-lingual transferability of monolingual representations}, - journal = {CoRR}, - volume = {abs/1910.11856}, - year = {2019}, - archivePrefix = {arXiv}, - eprint = {1910.11856} + bibtex_citation=r""" +@article{Artetxe:etal:2019, + archiveprefix = {arXiv}, + author = {Mikel Artetxe and Sebastian Ruder and Dani Yogatama}, + eprint = {1910.11856}, + journal = {CoRR}, + title = {On the cross-lingual transferability of monolingual representations}, + volume = {abs/1910.11856}, + year = {2019}, } -@inproceedings{ - dumitrescu2021liro, - title={LiRo: Benchmark and leaderboard for Romanian language tasks}, - author={Stefan Daniel Dumitrescu and Petru Rebeja and Beata Lorincz and Mihaela Gaman and Andrei Avram and Mihai Ilie and Andrei Pruteanu and Adriana Stan and Lorena Rosia and Cristina Iacobescu and Luciana Morogan and George Dima and Gabriel Marchidan and Traian Rebedea and Madalina Chitez and Dani Yogatama and Sebastian Ruder and Radu Tudor Ionescu and Razvan Pascanu and Viorica Patraucean}, - booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)}, - year={2021}, - url={https://openreview.net/forum?id=JH61CD7afTv} -}""", + +@inproceedings{dumitrescu2021liro, + author = {Stefan Daniel Dumitrescu and Petru Rebeja and Beata Lorincz and Mihaela Gaman and Andrei Avram and Mihai Ilie and Andrei Pruteanu and Adriana Stan and Lorena Rosia and Cristina Iacobescu and Luciana Morogan and George Dima and Gabriel Marchidan and Traian Rebedea and Madalina Chitez and Dani Yogatama and Sebastian Ruder and Radu Tudor Ionescu and Razvan Pascanu and Viorica Patraucean}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)}, + title = {LiRo: Benchmark and leaderboard for Romanian language tasks}, + url = {https://openreview.net/forum?id=JH61CD7afTv}, + year = {2021}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/nld/ArguAnaNLRetrieval.py b/mteb/tasks/Retrieval/nld/ArguAnaNLRetrieval.py index 2094bf8081..d38c2d500b 100644 --- a/mteb/tasks/Retrieval/nld/ArguAnaNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/ArguAnaNLRetrieval.py @@ -30,14 +30,16 @@ class ArguAnaNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["ArguAna"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackAndroidNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackAndroidNLRetrieval.py index 86a40620a5..98dc5c6848 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackAndroidNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackAndroidNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackAndroid"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackEnglishNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackEnglishNLRetrieval.py index e47cf3d139..de9f34a85a 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackEnglishNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackEnglishNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackEnglish"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackGamingNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackGamingNLRetrieval.py index 6c787dc23d..d808774507 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackGamingNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackGamingNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackGamingRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackGisNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackGisNLRetrieval.py index 08e57961f9..62664ddf69 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackGisNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackGisNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackGisRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackMathematicaNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackMathematicaNLRetrieval.py index 44909f261b..0ca7f3dfbc 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackMathematicaNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackMathematicaNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackMathematicaRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackPhysicsNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackPhysicsNLRetrieval.py index 96f65f61d2..ccbe0ce1fe 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackPhysicsNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackPhysicsNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackPhysicsRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackProgrammersNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackProgrammersNLRetrieval.py index f9ee41a024..09a158bfac 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackProgrammersNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackProgrammersNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackProgrammersRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackStatsNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackStatsNLRetrieval.py index 97642a45cd..31949a24d3 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackStatsNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackStatsNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackStatsRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackTexNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackTexNLRetrieval.py index f09f7df03c..eff835af20 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackTexNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackTexNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackTexRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackUnixNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackUnixNLRetrieval.py index f67b576d4d..3ce79d2048 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackUnixNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackUnixNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackUnixRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackWebmastersNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackWebmastersNLRetrieval.py index e834480e98..ecd7e570b8 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackWebmastersNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackWebmastersNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackWebmasters"], ) diff --git a/mteb/tasks/Retrieval/nld/CQADupstackWordpressNLRetrieval.py b/mteb/tasks/Retrieval/nld/CQADupstackWordpressNLRetrieval.py index b9faf5a841..4735d57eab 100644 --- a/mteb/tasks/Retrieval/nld/CQADupstackWordpressNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/CQADupstackWordpressNLRetrieval.py @@ -31,15 +31,17 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackWordpressRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/ClimateFEVERNLRetrieval.py b/mteb/tasks/Retrieval/nld/ClimateFEVERNLRetrieval.py index b43c39bdbb..222c46d871 100644 --- a/mteb/tasks/Retrieval/nld/ClimateFEVERNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/ClimateFEVERNLRetrieval.py @@ -28,14 +28,16 @@ class ClimateFEVERNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["ClimateFEVER"], ) diff --git a/mteb/tasks/Retrieval/nld/DBPediaNLRetrieval.py b/mteb/tasks/Retrieval/nld/DBPediaNLRetrieval.py index 90a3675106..501b64a4da 100644 --- a/mteb/tasks/Retrieval/nld/DBPediaNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/DBPediaNLRetrieval.py @@ -27,15 +27,17 @@ class DBPediaNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", prompt={ "query": "Given a query, retrieve relevant entity descriptions from DBPedia" }, diff --git a/mteb/tasks/Retrieval/nld/FEVERNLRetrieval.py b/mteb/tasks/Retrieval/nld/FEVERNLRetrieval.py index 3c00d578b2..60b995c8e6 100644 --- a/mteb/tasks/Retrieval/nld/FEVERNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/FEVERNLRetrieval.py @@ -33,14 +33,16 @@ class FEVERNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["FEVER"], ) diff --git a/mteb/tasks/Retrieval/nld/FiQA2018NLRetrieval.py b/mteb/tasks/Retrieval/nld/FiQA2018NLRetrieval.py index 9be7798bfc..b3ef57e362 100644 --- a/mteb/tasks/Retrieval/nld/FiQA2018NLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/FiQA2018NLRetrieval.py @@ -29,14 +29,16 @@ class FiQA2018NL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["FiQA2018"], ) diff --git a/mteb/tasks/Retrieval/nld/HotpotQANLRetrieval.py b/mteb/tasks/Retrieval/nld/HotpotQANLRetrieval.py index 642ee6920a..73ff4d625e 100644 --- a/mteb/tasks/Retrieval/nld/HotpotQANLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/HotpotQANLRetrieval.py @@ -31,14 +31,16 @@ class HotpotQANL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["HotpotQA"], ) diff --git a/mteb/tasks/Retrieval/nld/MMARCONLRetrieval.py b/mteb/tasks/Retrieval/nld/MMARCONLRetrieval.py index c8582b0cb4..951971f148 100644 --- a/mteb/tasks/Retrieval/nld/MMARCONLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/MMARCONLRetrieval.py @@ -29,20 +29,22 @@ class MMMARCONL(AbsTaskRetrieval): annotations_creators="derived", # manually checked a small subset dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@article{DBLP:journals/corr/abs-2108-13897, - author = {Luiz Bonifacio and - Israel Campiotti and - Roberto de Alencar Lotufo and - Rodrigo Frassetto Nogueira}, - title = {mMARCO: {A} Multilingual Version of {MS} {MARCO} Passage Ranking Dataset}, - journal = {CoRR}, - volume = {abs/2108.13897}, - year = {2021}, - url = {https://arxiv.org/abs/2108.13897}, - eprinttype = {arXiv}, - eprint = {2108.13897}, - timestamp = {Mon, 20 Mar 2023 15:35:34 +0100}, - biburl = {https://dblp.org/rec/journals/corr/abs-2108-13897.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} -}""", + bibtex_citation=r""" +@article{DBLP:journals/corr/abs-2108-13897, + author = {Luiz Bonifacio and +Israel Campiotti and +Roberto de Alencar Lotufo and +Rodrigo Frassetto Nogueira}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/abs-2108-13897.bib}, + eprint = {2108.13897}, + eprinttype = {arXiv}, + journal = {CoRR}, + timestamp = {Mon, 20 Mar 2023 15:35:34 +0100}, + title = {mMARCO: {A} Multilingual Version of {MS} {MARCO} Passage Ranking Dataset}, + url = {https://arxiv.org/abs/2108.13897}, + volume = {abs/2108.13897}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/nld/NFCorpusNLRetrieval.py b/mteb/tasks/Retrieval/nld/NFCorpusNLRetrieval.py index bd0ab7c162..92edcdd4c2 100644 --- a/mteb/tasks/Retrieval/nld/NFCorpusNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/NFCorpusNLRetrieval.py @@ -28,14 +28,16 @@ class NFCorpusNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["NFCorpus"], ) diff --git a/mteb/tasks/Retrieval/nld/NQNLRetrieval.py b/mteb/tasks/Retrieval/nld/NQNLRetrieval.py index f88f95061d..f6e8bf0007 100644 --- a/mteb/tasks/Retrieval/nld/NQNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/NQNLRetrieval.py @@ -27,14 +27,16 @@ class NQNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["NQ"], ) diff --git a/mteb/tasks/Retrieval/nld/QuoraNLRetrieval.py b/mteb/tasks/Retrieval/nld/QuoraNLRetrieval.py index 7e97fc6a88..a3452f71c9 100644 --- a/mteb/tasks/Retrieval/nld/QuoraNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/QuoraNLRetrieval.py @@ -32,14 +32,16 @@ class QuoraNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["QuoraRetrieval"], ) diff --git a/mteb/tasks/Retrieval/nld/SCIDOCSNLRetrieval.py b/mteb/tasks/Retrieval/nld/SCIDOCSNLRetrieval.py index 46fac07a12..de2c76cc8a 100644 --- a/mteb/tasks/Retrieval/nld/SCIDOCSNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/SCIDOCSNLRetrieval.py @@ -30,14 +30,16 @@ class SCIDOCSNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["SCIDOCS"], ) diff --git a/mteb/tasks/Retrieval/nld/SciFactNLRetrieval.py b/mteb/tasks/Retrieval/nld/SciFactNLRetrieval.py index a04d559d48..ae9e7f9ae2 100644 --- a/mteb/tasks/Retrieval/nld/SciFactNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/SciFactNLRetrieval.py @@ -27,14 +27,16 @@ class SciFactNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["SciFact"], ) diff --git a/mteb/tasks/Retrieval/nld/TRECCOVIDNLRetrieval.py b/mteb/tasks/Retrieval/nld/TRECCOVIDNLRetrieval.py index 0efaa4f536..8b2d7699f0 100644 --- a/mteb/tasks/Retrieval/nld/TRECCOVIDNLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/TRECCOVIDNLRetrieval.py @@ -31,14 +31,16 @@ class TRECCOVIDNL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["TRECCOVID"], ) diff --git a/mteb/tasks/Retrieval/nld/Touche2020NLRetrieval.py b/mteb/tasks/Retrieval/nld/Touche2020NLRetrieval.py index 61181b694b..ecc40186c4 100644 --- a/mteb/tasks/Retrieval/nld/Touche2020NLRetrieval.py +++ b/mteb/tasks/Retrieval/nld/Touche2020NLRetrieval.py @@ -26,14 +26,16 @@ class Touche2020NL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["Touche2020"], ) diff --git a/mteb/tasks/Retrieval/nob/norquad.py b/mteb/tasks/Retrieval/nob/norquad.py index f578cefec8..73a85e5ece 100644 --- a/mteb/tasks/Retrieval/nob/norquad.py +++ b/mteb/tasks/Retrieval/nob/norquad.py @@ -28,24 +28,26 @@ class NorQuadRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{ivanova-etal-2023-norquad, - title = "{N}or{Q}u{AD}: {N}orwegian Question Answering Dataset", - author = "Ivanova, Sardana and - Andreassen, Fredrik and - Jentoft, Matias and - Wold, Sondre and - {\O}vrelid, Lilja", - editor = {Alum{\"a}e, Tanel and - Fishel, Mark}, - booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may, - year = "2023", - address = "T{\'o}rshavn, Faroe Islands", - publisher = "University of Tartu Library", - url = "https://aclanthology.org/2023.nodalida-1.17", - pages = "159--168", - abstract = "In this paper we present NorQuAD: the first Norwegian question answering dataset for machine reading comprehension. The dataset consists of 4,752 manually created question-answer pairs. We here detail the data collection procedure and present statistics of the dataset. We also benchmark several multilingual and Norwegian monolingual language models on the dataset and compare them against human performance. The dataset will be made freely available.", -}""", + bibtex_citation=r""" +@inproceedings{ivanova-etal-2023-norquad, + abstract = {In this paper we present NorQuAD: the first Norwegian question answering dataset for machine reading comprehension. The dataset consists of 4,752 manually created question-answer pairs. We here detail the data collection procedure and present statistics of the dataset. We also benchmark several multilingual and Norwegian monolingual language models on the dataset and compare them against human performance. The dataset will be made freely available.}, + address = {T{\'o}rshavn, Faroe Islands}, + author = {Ivanova, Sardana and +Andreassen, Fredrik and +Jentoft, Matias and +Wold, Sondre and +{\O}vrelid, Lilja}, + booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Alum{\"a}e, Tanel and +Fishel, Mark}, + month = may, + pages = {159--168}, + publisher = {University of Tartu Library}, + title = {{N}or{Q}u{AD}: {N}orwegian Question Answering Dataset}, + url = {https://aclanthology.org/2023.nodalida-1.17}, + year = {2023}, +} +""", prompt={ "query": "Given a question in Norwegian, retrieve the answer from Wikipedia articles" }, diff --git a/mteb/tasks/Retrieval/nob/snl_retrieval.py b/mteb/tasks/Retrieval/nob/snl_retrieval.py index cf64834329..9d2016a7c5 100644 --- a/mteb/tasks/Retrieval/nob/snl_retrieval.py +++ b/mteb/tasks/Retrieval/nob/snl_retrieval.py @@ -27,12 +27,14 @@ class SNLRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@mastersthesis{navjord2023beyond, - title={Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, - author={Navjord, J{\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, - year={2023}, - school={Norwegian University of Life Sciences, {\AA}s} -}""", + bibtex_citation=r""" +@mastersthesis{navjord2023beyond, + author = {Navjord, J{\o}rgen Johnsen and Korsvik, Jon-Mikkel Ryen}, + school = {Norwegian University of Life Sciences, {\AA}s}, + title = {Beyond extractive: advancing abstractive automatic text summarization in Norwegian with transformers}, + year = {2023}, +} +""", prompt={"query": "Given a lexicon headline in Norwegian, retrieve its article"}, task_subtypes=["Article retrieval"], ) diff --git a/mteb/tasks/Retrieval/pol/ArguAnaPLRetrieval.py b/mteb/tasks/Retrieval/pol/ArguAnaPLRetrieval.py index 4251863c75..980dfffc9f 100644 --- a/mteb/tasks/Retrieval/pol/ArguAnaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/ArguAnaPLRetrieval.py @@ -30,13 +30,15 @@ class ArguAnaPL(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation=None, - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["ArguAna"], ) diff --git a/mteb/tasks/Retrieval/pol/DBPediaPLRetrieval.py b/mteb/tasks/Retrieval/pol/DBPediaPLRetrieval.py index 23d44fcb59..64959a5c50 100644 --- a/mteb/tasks/Retrieval/pol/DBPediaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/DBPediaPLRetrieval.py @@ -28,16 +28,18 @@ class DBPediaPL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@inproceedings{Hasibi:2017:DVT, - author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, - title = {DBpedia-Entity V2: A Test Collection for Entity Search}, - booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, - series = {SIGIR '17}, - year = {2017}, - pages = {1265--1268}, - doi = {10.1145/3077136.3080751}, - publisher = {ACM} -}""", + bibtex_citation=r""" +@inproceedings{Hasibi:2017:DVT, + author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, + booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, + doi = {10.1145/3077136.3080751}, + pages = {1265--1268}, + publisher = {ACM}, + series = {SIGIR '17}, + title = {DBpedia-Entity V2: A Test Collection for Entity Search}, + year = {2017}, +} +""", adapted_from=["DBPedia"], ) @@ -65,15 +67,17 @@ class DBPediaPLHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@inproceedings{Hasibi:2017:DVT, - author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, - title = {DBpedia-Entity V2: A Test Collection for Entity Search}, - booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, - series = {SIGIR '17}, - year = {2017}, - pages = {1265--1268}, - doi = {10.1145/3077136.3080751}, - publisher = {ACM} -}""", + bibtex_citation=r""" +@inproceedings{Hasibi:2017:DVT, + author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie}, + booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, + doi = {10.1145/3077136.3080751}, + pages = {1265--1268}, + publisher = {ACM}, + series = {SIGIR '17}, + title = {DBpedia-Entity V2: A Test Collection for Entity Search}, + year = {2017}, +} +""", adapted_from=["DBPedia"], ) diff --git a/mteb/tasks/Retrieval/pol/FiQAPLRetrieval.py b/mteb/tasks/Retrieval/pol/FiQAPLRetrieval.py index b54f4ae4ed..dad7ea81e1 100644 --- a/mteb/tasks/Retrieval/pol/FiQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/FiQAPLRetrieval.py @@ -30,12 +30,13 @@ class FiQAPLRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{ -thakur2021beir, -title={{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, -author={Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, -booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, -year={2021}, -url={https://openreview.net/forum?id=wCu6T5xFjeJ} -}""", + bibtex_citation=r""" +@inproceedings{thakur2021beir, + author = {Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + title = {{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, + url = {https://openreview.net/forum?id=wCu6T5xFjeJ}, + year = {2021}, +} +""", ) diff --git a/mteb/tasks/Retrieval/pol/HotpotQAPLRetrieval.py b/mteb/tasks/Retrieval/pol/HotpotQAPLRetrieval.py index 609aa1bbf3..8f083c4e7f 100644 --- a/mteb/tasks/Retrieval/pol/HotpotQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/HotpotQAPLRetrieval.py @@ -28,14 +28,16 @@ class HotpotQAPL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["HotpotQA"], ) @@ -63,13 +65,15 @@ class HotpotQAPLHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["HotpotQA"], ) diff --git a/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py b/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py index 9a4780612c..d53cb7cbc9 100644 --- a/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/MSMARCOPLRetrieval.py @@ -30,14 +30,16 @@ class MSMARCOPL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["MSMARCO"], ) @@ -67,13 +69,15 @@ class MSMARCOPLHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["MSMARCO"], ) diff --git a/mteb/tasks/Retrieval/pol/NFCorpusPLRetrieval.py b/mteb/tasks/Retrieval/pol/NFCorpusPLRetrieval.py index ad6e41dc07..2c8558c08c 100644 --- a/mteb/tasks/Retrieval/pol/NFCorpusPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/NFCorpusPLRetrieval.py @@ -28,13 +28,15 @@ class NFCorpusPL(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["NFCorpus"], ) diff --git a/mteb/tasks/Retrieval/pol/NQPLRetrieval.py b/mteb/tasks/Retrieval/pol/NQPLRetrieval.py index bbe96a3ce4..a3cab40691 100644 --- a/mteb/tasks/Retrieval/pol/NQPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/NQPLRetrieval.py @@ -28,14 +28,16 @@ class NQPL(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["NQ"], ) @@ -63,13 +65,15 @@ class NQPLHardNegatives(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["NQ"], ) diff --git a/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py b/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py index 9666c835cd..7ab63fe481 100644 --- a/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/QuoraPLRetrieval.py @@ -28,14 +28,16 @@ class QuoraPLRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["QuoraRetrieval"], ) @@ -63,13 +65,15 @@ class QuoraPLRetrievalHardNegatives(AbsTaskRetrieval): annotations_creators=None, dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["QuoraRetrieval"], ) diff --git a/mteb/tasks/Retrieval/pol/SCIDOCSPLRetrieval.py b/mteb/tasks/Retrieval/pol/SCIDOCSPLRetrieval.py index c7cd4958f8..5ebdf9eb0f 100644 --- a/mteb/tasks/Retrieval/pol/SCIDOCSPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/SCIDOCSPLRetrieval.py @@ -28,13 +28,15 @@ class SCIDOCSPL(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["SCIDOCS"], ) diff --git a/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py b/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py index 1199df50a9..d73b55eb2f 100644 --- a/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py @@ -28,13 +28,15 @@ class SciFactPL(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["SciFact"], ) diff --git a/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py b/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py index f3bfd2ad13..bb9da781b0 100644 --- a/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py @@ -31,13 +31,15 @@ class TRECCOVIDPL(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@misc{wojtasik2024beirpl, - title={BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, - author={Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, - year={2024}, - eprint={2305.19840}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{wojtasik2024beirpl, + archiveprefix = {arXiv}, + author = {Konrad Wojtasik and Vadim Shishkin and Kacper Wołowiec and Arkadiusz Janz and Maciej Piasecki}, + eprint = {2305.19840}, + primaryclass = {cs.IR}, + title = {BEIR-PL: Zero Shot Information Retrieval Benchmark for the Polish Language}, + year = {2024}, +} +""", adapted_from=["TRECCOVID"], ) diff --git a/mteb/tasks/Retrieval/rus/RiaNewsRetrieval.py b/mteb/tasks/Retrieval/rus/RiaNewsRetrieval.py index 049a3c5edf..0399374200 100644 --- a/mteb/tasks/Retrieval/rus/RiaNewsRetrieval.py +++ b/mteb/tasks/Retrieval/rus/RiaNewsRetrieval.py @@ -29,12 +29,14 @@ class RiaNewsRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{gavrilov2018self, - title={Self-Attentive Model for Headline Generation}, - author={Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin}, - booktitle={Proceedings of the 41st European Conference on Information Retrieval}, - year={2019} - }""", + bibtex_citation=r""" +@inproceedings{gavrilov2018self, + author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin}, + booktitle = {Proceedings of the 41st European Conference on Information Retrieval}, + title = {Self-Attentive Model for Headline Generation}, + year = {2019}, +} +""", prompt={"query": "Given a news title, retrieve relevant news article"}, ) @@ -63,11 +65,13 @@ class RiaNewsRetrievalHardNegatives(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{gavrilov2018self, - title={Self-Attentive Model for Headline Generation}, - author={Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin}, - booktitle={Proceedings of the 41st European Conference on Information Retrieval}, - year={2019} - }""", + bibtex_citation=r""" +@inproceedings{gavrilov2018self, + author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin}, + booktitle = {Proceedings of the 41st European Conference on Information Retrieval}, + title = {Self-Attentive Model for Headline Generation}, + year = {2019}, +} +""", adapted_from=["RiaNewsRetrieval"], ) diff --git a/mteb/tasks/Retrieval/rus/RuBQRetrieval.py b/mteb/tasks/Retrieval/rus/RuBQRetrieval.py index 3bb1bb35e9..1c294682c6 100644 --- a/mteb/tasks/Retrieval/rus/RuBQRetrieval.py +++ b/mteb/tasks/Retrieval/rus/RuBQRetrieval.py @@ -29,13 +29,15 @@ class RuBQRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{RuBQ2021, - title={RuBQ 2.0: An Innovated Russian Question Answering Dataset}, - author={Ivan Rybin and Vladislav Korablinov and Pavel Efimov and Pavel Braslavski}, - booktitle={ESWC}, - year={2021}, - pages={532--547} - }""", + bibtex_citation=r""" +@inproceedings{RuBQ2021, + author = {Ivan Rybin and Vladislav Korablinov and Pavel Efimov and Pavel Braslavski}, + booktitle = {ESWC}, + pages = {532--547}, + title = {RuBQ 2.0: An Innovated Russian Question Answering Dataset}, + year = {2021}, +} +""", prompt={ "query": "Given a question, retrieve Wikipedia passages that answer the question" }, diff --git a/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py b/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py index ea08c03225..fbba98487e 100644 --- a/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py +++ b/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py @@ -34,14 +34,14 @@ class SlovakSumRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{OndrejowaSlovakSum24, - title = {SlovakSum: A Large Scale Slovak Summarization Dataset}, - booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation}, - author = {Ondrejová, Viktória and Šuppa, Marek}, - date = {2024}, - } - """, + bibtex_citation=r""" +@inproceedings{OndrejowaSlovakSum24, + author = {Ondrejová, Viktória and Šuppa, Marek}, + booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation}, + date = {2024}, + title = {SlovakSum: A Large Scale Slovak Summarization Dataset}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py index 8ef0681dcd..79e0f59d01 100644 --- a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py +++ b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py @@ -30,27 +30,28 @@ class SpanishPassageRetrievalS2P(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@InProceedings{10.1007/978-3-030-15719-7_19, -author="Kamateri, Eleni + bibtex_citation=r""" +@inproceedings{10.1007/978-3-030-15719-7_19, + abstract = {This paper describes a new test collection for passage retrieval from health-related Web resources in Spanish. The test collection contains 10,037 health-related documents in Spanish, 37 topics representing complex information needs formulated in a total of 167 natural language questions, and manual relevance assessments of text passages, pooled from multiple systems. This test collection is the first to combine search in a language beyond English, passage retrieval, and health-related resources and topics targeting the general public.}, + address = {Cham}, + author = {Kamateri, Eleni and Tsikrika, Theodora and Symeonidis, Spyridon and Vrochidis, Stefanos and Minker, Wolfgang -and Kompatsiaris, Yiannis", -editor="Azzopardi, Leif +and Kompatsiaris, Yiannis}, + booktitle = {Advances in Information Retrieval}, + editor = {Azzopardi, Leif and Stein, Benno and Fuhr, Norbert and Mayr, Philipp and Hauff, Claudia -and Hiemstra, Djoerd", -title="A Test Collection for Passage Retrieval Evaluation of Spanish Health-Related Resources", -booktitle="Advances in Information Retrieval", -year="2019", -publisher="Springer International Publishing", -address="Cham", -pages="148--154", -abstract="This paper describes a new test collection for passage retrieval from health-related Web resources in Spanish. The test collection contains 10,037 health-related documents in Spanish, 37 topics representing complex information needs formulated in a total of 167 natural language questions, and manual relevance assessments of text passages, pooled from multiple systems. This test collection is the first to combine search in a language beyond English, passage retrieval, and health-related resources and topics targeting the general public.", -isbn="978-3-030-15719-7" +and Hiemstra, Djoerd}, + isbn = {978-3-030-15719-7}, + pages = {148--154}, + publisher = {Springer International Publishing}, + title = {A Test Collection for Passage Retrieval Evaluation of Spanish Health-Related Resources}, + year = {2019}, } """, ) diff --git a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py index 86b45f1f4c..f22739e2a1 100644 --- a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py +++ b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py @@ -30,27 +30,28 @@ class SpanishPassageRetrievalS2S(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@InProceedings{10.1007/978-3-030-15719-7_19, -author="Kamateri, Eleni + bibtex_citation=r""" +@inproceedings{10.1007/978-3-030-15719-7_19, + abstract = {This paper describes a new test collection for passage retrieval from health-related Web resources in Spanish. The test collection contains 10,037 health-related documents in Spanish, 37 topics representing complex information needs formulated in a total of 167 natural language questions, and manual relevance assessments of text passages, pooled from multiple systems. This test collection is the first to combine search in a language beyond English, passage retrieval, and health-related resources and topics targeting the general public.}, + address = {Cham}, + author = {Kamateri, Eleni and Tsikrika, Theodora and Symeonidis, Spyridon and Vrochidis, Stefanos and Minker, Wolfgang -and Kompatsiaris, Yiannis", -editor="Azzopardi, Leif +and Kompatsiaris, Yiannis}, + booktitle = {Advances in Information Retrieval}, + editor = {Azzopardi, Leif and Stein, Benno and Fuhr, Norbert and Mayr, Philipp and Hauff, Claudia -and Hiemstra, Djoerd", -title="A Test Collection for Passage Retrieval Evaluation of Spanish Health-Related Resources", -booktitle="Advances in Information Retrieval", -year="2019", -publisher="Springer International Publishing", -address="Cham", -pages="148--154", -abstract="This paper describes a new test collection for passage retrieval from health-related Web resources in Spanish. The test collection contains 10,037 health-related documents in Spanish, 37 topics representing complex information needs formulated in a total of 167 natural language questions, and manual relevance assessments of text passages, pooled from multiple systems. This test collection is the first to combine search in a language beyond English, passage retrieval, and health-related resources and topics targeting the general public.", -isbn="978-3-030-15719-7" +and Hiemstra, Djoerd}, + isbn = {978-3-030-15719-7}, + pages = {148--154}, + publisher = {Springer International Publishing}, + title = {A Test Collection for Passage Retrieval Evaluation of Spanish Health-Related Resources}, + year = {2019}, } """, ) diff --git a/mteb/tasks/Retrieval/swe/SweFaqRetrieval.py b/mteb/tasks/Retrieval/swe/SweFaqRetrieval.py index eccc7d9ab7..14637b5eb5 100644 --- a/mteb/tasks/Retrieval/swe/SweFaqRetrieval.py +++ b/mteb/tasks/Retrieval/swe/SweFaqRetrieval.py @@ -30,13 +30,15 @@ class SweFaqRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{berdivcevskis2023superlim, - title={Superlim: A Swedish language understanding evaluation benchmark}, - author={Berdi{\v{c}}evskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and {\"O}hman, Joey and Adesam, Yvonne and Borin, Lars and Dann{\'e}lls, Dana and Forsberg, Markus and Isbister, Tim and others}, - booktitle={Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, - pages={8137--8153}, - year={2023} -}""", # for the benchmark in which this dataset is used + bibtex_citation=r""" +@inproceedings{berdivcevskis2023superlim, + author = {Berdi{\v{c}}evskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and {\"O}hman, Joey and Adesam, Yvonne and Borin, Lars and Dann{\'e}lls, Dana and Forsberg, Markus and Isbister, Tim and others}, + booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, + pages = {8137--8153}, + title = {Superlim: A Swedish language understanding evaluation benchmark}, + year = {2023}, +} +""", # for the benchmark in which this dataset is used prompt={"query": "Retrieve answers given questions in Swedish"}, ) diff --git a/mteb/tasks/Retrieval/swe/SwednRetrieval.py b/mteb/tasks/Retrieval/swe/SwednRetrieval.py index acd7b65de7..4867813af0 100644 --- a/mteb/tasks/Retrieval/swe/SwednRetrieval.py +++ b/mteb/tasks/Retrieval/swe/SwednRetrieval.py @@ -30,12 +30,14 @@ class SwednRetrieval(AbsTaskRetrieval): dialect=[], task_subtypes=["Article retrieval"], sample_creation="found", - bibtex_citation="""@inproceedings{monsen2021method, - title={A method for building non-english corpora for abstractive text summarization}, - author={Monsen, Julius and J{\"o}nsson, Arne}, - booktitle={Proceedings of CLARIN Annual Conference}, - year={2021} -}""", + bibtex_citation=r""" +@inproceedings{monsen2021method, + author = {Monsen, Julius and J{\"o}nsson, Arne}, + booktitle = {Proceedings of CLARIN Annual Conference}, + title = {A method for building non-english corpora for abstractive text summarization}, + year = {2021}, +} +""", prompt={ "query": "Given a Swedish news headline retrieve summaries or news articles" }, diff --git a/mteb/tasks/Retrieval/tur/TurHistQuad.py b/mteb/tasks/Retrieval/tur/TurHistQuad.py index cac7b0d8fb..a8bc0912bb 100644 --- a/mteb/tasks/Retrieval/tur/TurHistQuad.py +++ b/mteb/tasks/Retrieval/tur/TurHistQuad.py @@ -28,19 +28,19 @@ class TurHistQuadRetrieval(AbsTaskRetrieval): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation=""" - @INPROCEEDINGS{9559013, - author={Soygazi, Fatih and Çiftçi, Okan and Kök, Uğurcan and Cengiz, Soner}, - booktitle={2021 6th International Conference on Computer Science and Engineering (UBMK)}, - title={THQuAD: Turkish Historic Question Answering Dataset for Reading Comprehension}, - year={2021}, - volume={}, - number={}, - pages={215-220}, - keywords={Computer science;Computational modeling;Neural networks;Knowledge discovery;Information retrieval;Natural language processing;History;question answering;information retrieval;natural language understanding;deep learning;contextualized word embeddings}, - doi={10.1109/UBMK52708.2021.9559013}} - - """, + bibtex_citation=r""" +@inproceedings{9559013, + author = {Soygazi, Fatih and Çiftçi, Okan and Kök, Uğurcan and Cengiz, Soner}, + booktitle = {2021 6th International Conference on Computer Science and Engineering (UBMK)}, + doi = {10.1109/UBMK52708.2021.9559013}, + keywords = {Computer science;Computational modeling;Neural networks;Knowledge discovery;Information retrieval;Natural language processing;History;question answering;information retrieval;natural language understanding;deep learning;contextualized word embeddings}, + number = {}, + pages = {215-220}, + title = {THQuAD: Turkish Historic Question Answering Dataset for Reading Comprehension}, + volume = {}, + year = {2021}, +} +""", ) def load_data(self, **kwargs) -> None: diff --git a/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py b/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py index 57d36d4312..75a7928621 100644 --- a/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py +++ b/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py @@ -29,5 +29,5 @@ class GreenNodeTableMarkdownRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="", # TODO: Add bibtex citation when the paper is published + bibtex_citation="", # TODO: Add bibtex citation when the paper is published ) diff --git a/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py b/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py index 07ec5aba8b..8391fb0adb 100644 --- a/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py +++ b/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py @@ -33,23 +33,26 @@ class VieQuADRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{nguyen-etal-2020-vietnamese, -title = "A Vietnamese Dataset for Evaluating Machine Reading Comprehension", -author = "Nguyen, Kiet and - Nguyen, Vu and - Nguyen, Anh and - Nguyen, Ngan", -editor = "Scott, Donia and - Bel, Nuria and - Zong, Chengqing", -booktitle = "Proceedings of the 28th International Conference on Computational Linguistics", -month = dec, -year = "2020", -address = "Barcelona, Spain (Online)", -publisher = "International Committee on Computational Linguistics", -url = "https://aclanthology.org/2020.coling-main.233", -doi = "10.18653/v1/2020.coling-main.233", -pages = "2595--2605"}""", + bibtex_citation=r""" +@inproceedings{nguyen-etal-2020-vietnamese, + address = {Barcelona, Spain (Online)}, + author = {Nguyen, Kiet and +Nguyen, Vu and +Nguyen, Anh and +Nguyen, Ngan}, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.233}, + editor = {Scott, Donia and +Bel, Nuria and +Zong, Chengqing}, + month = dec, + pages = {2595--2605}, + publisher = {International Committee on Computational Linguistics}, + title = {A Vietnamese Dataset for Evaluating Machine Reading Comprehension}, + url = {https://aclanthology.org/2020.coling-main.233}, + year = {2020}, +} +""", ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py b/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py index 379bc02235..3a97e80afe 100644 --- a/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py +++ b/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py @@ -27,5 +27,5 @@ class ZacLegalTextRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="", # TODO: Add bibtex citation when the paper is published + bibtex_citation="", # TODO: Add bibtex citation when the paper is published ) diff --git a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py index 643a414ada..dd158e2858 100644 --- a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py @@ -57,14 +57,16 @@ class T2Retrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=None, sample_creation=None, - bibtex_citation="""@misc{xie2023t2ranking, - title={T2Ranking: A large-scale Chinese Benchmark for Passage Ranking}, - author={Xiaohui Xie and Qian Dong and Bingning Wang and Feiyang Lv and Ting Yao and Weinan Gan and Zhijing Wu and Xiangsheng Li and Haitao Li and Yiqun Liu and Jin Ma}, - year={2023}, - eprint={2304.03679}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + bibtex_citation=r""" +@misc{xie2023t2ranking, + archiveprefix = {arXiv}, + author = {Xiaohui Xie and Qian Dong and Bingning Wang and Feiyang Lv and Ting Yao and Weinan Gan and Zhijing Wu and Xiangsheng Li and Haitao Li and Yiqun Liu and Jin Ma}, + eprint = {2304.03679}, + primaryclass = {cs.IR}, + title = {T2Ranking: A large-scale Chinese Benchmark for Passage Ranking}, + year = {2023}, +} +""", prompt={ "query": "Given a Chinese search query, retrieve web passages that answer the question" }, @@ -108,14 +110,16 @@ class MMarcoRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{xiao2024cpack, - title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, - author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, - year={2024}, - eprint={2309.07597}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{xiao2024cpack, + archiveprefix = {arXiv}, + author = {Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, + eprint = {2309.07597}, + primaryclass = {cs.CL}, + title = {C-Pack: Packaged Resources To Advance General Chinese Embedding}, + year = {2024}, +} +""", prompt={ "query": "Given a web search query, retrieve relevant passages that answer the query" }, @@ -157,14 +161,16 @@ class DuRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{qiu2022dureaderretrieval, - title={DuReader_retrieval: A Large-scale Chinese Benchmark for Passage Retrieval from Web Search Engine}, - author={Yifu Qiu and Hongyu Li and Yingqi Qu and Ying Chen and Qiaoqiao She and Jing Liu and Hua Wu and Haifeng Wang}, - year={2022}, - eprint={2203.10232}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{qiu2022dureaderretrieval, + archiveprefix = {arXiv}, + author = {Yifu Qiu and Hongyu Li and Yingqi Qu and Ying Chen and Qiaoqiao She and Jing Liu and Hua Wu and Haifeng Wang}, + eprint = {2203.10232}, + primaryclass = {cs.CL}, + title = {DuReader_retrieval: A Large-scale Chinese Benchmark for Passage Retrieval from Web Search Engine}, + year = {2022}, +} +""", prompt={ "query": "Given a Chinese search query, retrieve web passages that answer the question" }, @@ -206,15 +212,17 @@ class CovidRetrieval(AbsTaskRetrieval): annotations_creators="human-annotated", dialect=[], sample_creation=None, - bibtex_citation="""@misc{long2022multicprmultidomainchinese, - title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, - author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, - year={2022}, - eprint={2203.03367}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2203.03367}, -}""", + bibtex_citation=r""" +@misc{long2022multicprmultidomainchinese, + archiveprefix = {arXiv}, + author = {Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + eprint = {2203.03367}, + primaryclass = {cs.IR}, + title = {Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + url = {https://arxiv.org/abs/2203.03367}, + year = {2022}, +} +""", prompt={ "query": "Given a question on COVID-19, retrieve news articles that answer the question" }, @@ -256,15 +264,17 @@ class CmedqaRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{qiu2022dureaderretrievallargescalechinesebenchmark, - title={DuReader_retrieval: A Large-scale Chinese Benchmark for Passage Retrieval from Web Search Engine}, - author={Yifu Qiu and Hongyu Li and Yingqi Qu and Ying Chen and Qiaoqiao She and Jing Liu and Hua Wu and Haifeng Wang}, - year={2022}, - eprint={2203.10232}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2203.10232}, -}""", + bibtex_citation=r""" +@misc{qiu2022dureaderretrievallargescalechinesebenchmark, + archiveprefix = {arXiv}, + author = {Yifu Qiu and Hongyu Li and Yingqi Qu and Ying Chen and Qiaoqiao She and Jing Liu and Hua Wu and Haifeng Wang}, + eprint = {2203.10232}, + primaryclass = {cs.CL}, + title = {DuReader_retrieval: A Large-scale Chinese Benchmark for Passage Retrieval from Web Search Engine}, + url = {https://arxiv.org/abs/2203.10232}, + year = {2022}, +} +""", prompt={ "query": "Given a Chinese community medical question, retrieve replies that best answer the question" }, @@ -308,15 +318,17 @@ class EcomRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{long2022multicprmultidomainchinese, - title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, - author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, - year={2022}, - eprint={2203.03367}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2203.03367}, -}""", + bibtex_citation=r""" +@misc{long2022multicprmultidomainchinese, + archiveprefix = {arXiv}, + author = {Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + eprint = {2203.03367}, + primaryclass = {cs.IR}, + title = {Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + url = {https://arxiv.org/abs/2203.03367}, + year = {2022}, +} +""", prompt={ "query": "Given a user query from an e-commerce website, retrieve description sentences of relevant products" }, @@ -360,15 +372,17 @@ class MedicalRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{long2022multicprmultidomainchinese, - title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, - author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, - year={2022}, - eprint={2203.03367}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2203.03367}, -}""", + bibtex_citation=r""" +@misc{long2022multicprmultidomainchinese, + archiveprefix = {arXiv}, + author = {Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + eprint = {2203.03367}, + primaryclass = {cs.IR}, + title = {Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + url = {https://arxiv.org/abs/2203.03367}, + year = {2022}, +} +""", prompt={ "query": "Given a medical question, retrieve user replies that best answer the question" }, @@ -412,15 +426,17 @@ class VideoRetrieval(AbsTaskRetrieval): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{long2022multicprmultidomainchinese, - title={Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, - author={Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, - year={2022}, - eprint={2203.03367}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2203.03367}, -}""", + bibtex_citation=r""" +@misc{long2022multicprmultidomainchinese, + archiveprefix = {arXiv}, + author = {Dingkun Long and Qiong Gao and Kuan Zou and Guangwei Xu and Pengjun Xie and Ruijie Guo and Jian Xu and Guanjun Jiang and Luxi Xing and Ping Yang}, + eprint = {2203.03367}, + primaryclass = {cs.IR}, + title = {Multi-CPR: A Multi Domain Chinese Dataset for Passage Retrieval}, + url = {https://arxiv.org/abs/2203.03367}, + year = {2022}, +} +""", prompt={ "query": "Given a video search query, retrieve the titles of relevant videos" }, diff --git a/mteb/tasks/Retrieval/zho/LeCaRDv2Retrieval.py b/mteb/tasks/Retrieval/zho/LeCaRDv2Retrieval.py index 9d3480ff79..e733755fd8 100644 --- a/mteb/tasks/Retrieval/zho/LeCaRDv2Retrieval.py +++ b/mteb/tasks/Retrieval/zho/LeCaRDv2Retrieval.py @@ -27,12 +27,14 @@ class LeCaRDv2(AbsTaskRetrieval): annotations_creators="derived", dialect=None, sample_creation="found", - bibtex_citation="""@misc{li2023lecardv2, - title={LeCaRDv2: A Large-Scale Chinese Legal Case Retrieval Dataset}, - author={Haitao Li and Yunqiu Shao and Yueyue Wu and Qingyao Ai and Yixiao Ma and Yiqun Liu}, - year={2023}, - eprint={2310.17609}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{li2023lecardv2, + archiveprefix = {arXiv}, + author = {Haitao Li and Yunqiu Shao and Yueyue Wu and Qingyao Ai and Yixiao Ma and Yiqun Liu}, + eprint = {2310.17609}, + primaryclass = {cs.CL}, + title = {LeCaRDv2: A Large-Scale Chinese Legal Case Retrieval Dataset}, + year = {2023}, +} +""", ) diff --git a/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py b/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py index a0552eb4df..862fb18b98 100644 --- a/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py @@ -28,12 +28,14 @@ class GermanSTSBenchmarkSTS(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@InProceedings{huggingface:dataset:stsb_multi_mt, -title = {Machine translated multilingual STS benchmark dataset.}, -author={Philip May}, -year={2021}, -url={https://github.com/PhilipMay/stsb-multi-mt} -}""", + bibtex_citation=r""" +@inproceedings{huggingface:dataset:stsb_multi_mt, + author = {Philip May}, + title = {Machine translated multilingual STS benchmark dataset.}, + url = {https://github.com/PhilipMay/stsb-multi-mt}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/BiossesSTS.py b/mteb/tasks/STS/eng/BiossesSTS.py index 1fc1d5a1d0..9fc424d043 100644 --- a/mteb/tasks/STS/eng/BiossesSTS.py +++ b/mteb/tasks/STS/eng/BiossesSTS.py @@ -27,21 +27,23 @@ class BiossesSTS(AbsTaskSTS): annotations_creators="derived", dialect=[], sample_creation="found", - bibtex_citation="""@article{10.1093/bioinformatics/btx238, - author = {Soğancıoğlu, Gizem and Öztürk, Hakime and Özgür, Arzucan}, - title = "{BIOSSES: a semantic sentence similarity estimation system for the biomedical domain}", - journal = {Bioinformatics}, - volume = {33}, - number = {14}, - pages = {i49-i58}, - year = {2017}, - month = {07}, - abstract = "{The amount of information available in textual format is rapidly increasing in the biomedical domain. Therefore, natural language processing (NLP) applications are becoming increasingly important to facilitate the retrieval and analysis of these data. Computing the semantic similarity between sentences is an important component in many NLP tasks including text retrieval and summarization. A number of approaches have been proposed for semantic sentence similarity estimation for generic English. However, our experiments showed that such approaches do not effectively cover biomedical knowledge and produce poor results for biomedical text.We propose several approaches for sentence-level semantic similarity computation in the biomedical domain, including string similarity measures and measures based on the distributed vector representations of sentences learned in an unsupervised manner from a large biomedical corpus. In addition, ontology-based approaches are presented that utilize general and domain-specific ontologies. Finally, a supervised regression based model is developed that effectively combines the different similarity computation metrics. A benchmark data set consisting of 100 sentence pairs from the biomedical literature is manually annotated by five human experts and used for evaluating the proposed methods.The experiments showed that the supervised semantic sentence similarity computation approach obtained the best performance (0.836 correlation with gold standard human annotations) and improved over the state-of-the-art domain-independent systems up to 42.6\\% in terms of the Pearson correlation metric.A web-based system for biomedical semantic sentence similarity computation, the source code, and the annotated benchmark data set are available at: http://tabilab.cmpe.boun.edu.tr/BIOSSES/.}", - issn = {1367-4803}, - doi = {10.1093/bioinformatics/btx238}, - url = {https://doi.org/10.1093/bioinformatics/btx238}, - eprint = {https://academic.oup.com/bioinformatics/article-pdf/33/14/i49/50315066/bioinformatics\_33\_14\_i49.pdf}, -}""", + bibtex_citation=r""" +@article{10.1093/bioinformatics/btx238, + abstract = {{The amount of information available in textual format is rapidly increasing in the biomedical domain. Therefore, natural language processing (NLP) applications are becoming increasingly important to facilitate the retrieval and analysis of these data. Computing the semantic similarity between sentences is an important component in many NLP tasks including text retrieval and summarization. A number of approaches have been proposed for semantic sentence similarity estimation for generic English. However, our experiments showed that such approaches do not effectively cover biomedical knowledge and produce poor results for biomedical text.We propose several approaches for sentence-level semantic similarity computation in the biomedical domain, including string similarity measures and measures based on the distributed vector representations of sentences learned in an unsupervised manner from a large biomedical corpus. In addition, ontology-based approaches are presented that utilize general and domain-specific ontologies. Finally, a supervised regression based model is developed that effectively combines the different similarity computation metrics. A benchmark data set consisting of 100 sentence pairs from the biomedical literature is manually annotated by five human experts and used for evaluating the proposed methods.The experiments showed that the supervised semantic sentence similarity computation approach obtained the best performance (0.836 correlation with gold standard human annotations) and improved over the state-of-the-art domain-independent systems up to 42.6\\% in terms of the Pearson correlation metric.A web-based system for biomedical semantic sentence similarity computation, the source code, and the annotated benchmark data set are available at: http://tabilab.cmpe.boun.edu.tr/BIOSSES/.}}, + author = {Soğancıoğlu, Gizem and Öztürk, Hakime and Özgür, Arzucan}, + doi = {10.1093/bioinformatics/btx238}, + eprint = {https://academic.oup.com/bioinformatics/article-pdf/33/14/i49/50315066/bioinformatics\_33\_14\_i49.pdf}, + issn = {1367-4803}, + journal = {Bioinformatics}, + month = {07}, + number = {14}, + pages = {i49-i58}, + title = {{BIOSSES: a semantic sentence similarity estimation system for the biomedical domain}}, + url = {https://doi.org/10.1093/bioinformatics/btx238}, + volume = {33}, + year = {2017}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/STS12STS.py b/mteb/tasks/STS/eng/STS12STS.py index b222b42c66..a36608a0e9 100644 --- a/mteb/tasks/STS/eng/STS12STS.py +++ b/mteb/tasks/STS/eng/STS12STS.py @@ -27,19 +27,21 @@ class STS12STS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{10.5555/2387636.2387697, -author = {Agirre, Eneko and Diab, Mona and Cer, Daniel and Gonzalez-Agirre, Aitor}, -title = {SemEval-2012 task 6: a pilot on semantic textual similarity}, -year = {2012}, -publisher = {Association for Computational Linguistics}, -address = {USA}, -abstract = {Semantic Textual Similarity (STS) measures the degree of semantic equivalence between two texts. This paper presents the results of the STS pilot task in Semeval. The training data contained 2000 sentence pairs from previously existing paraphrase datasets and machine translation evaluation resources. The test data also comprised 2000 sentences pairs for those datasets, plus two surprise datasets with 400 pairs from a different machine translation evaluation corpus and 750 pairs from a lexical resource mapping exercise. The similarity of pairs of sentences was rated on a 0-5 scale (low to high similarity) by human judges using Amazon Mechanical Turk, with high Pearson correlation scores, around 90\%. 35 teams participated in the task, submitting 88 runs. The best results scored a Pearson correlation >80\%, well above a simple lexical baseline that only scored a 31\% correlation. This pilot task opens an exciting way ahead, although there are still open issues, specially the evaluation metric.}, -booktitle = {Proceedings of the First Joint Conference on Lexical and Computational Semantics - Volume 1: Proceedings of the Main Conference and the Shared Task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation}, -pages = {385–393}, -numpages = {9}, -location = {Montr\'{e}al, Canada}, -series = {SemEval '12} -}""", + bibtex_citation=r""" +@inproceedings{10.5555/2387636.2387697, + abstract = {Semantic Textual Similarity (STS) measures the degree of semantic equivalence between two texts. This paper presents the results of the STS pilot task in Semeval. The training data contained 2000 sentence pairs from previously existing paraphrase datasets and machine translation evaluation resources. The test data also comprised 2000 sentences pairs for those datasets, plus two surprise datasets with 400 pairs from a different machine translation evaluation corpus and 750 pairs from a lexical resource mapping exercise. The similarity of pairs of sentences was rated on a 0-5 scale (low to high similarity) by human judges using Amazon Mechanical Turk, with high Pearson correlation scores, around 90\%. 35 teams participated in the task, submitting 88 runs. The best results scored a Pearson correlation >80\%, well above a simple lexical baseline that only scored a 31\% correlation. This pilot task opens an exciting way ahead, although there are still open issues, specially the evaluation metric.}, + address = {USA}, + author = {Agirre, Eneko and Diab, Mona and Cer, Daniel and Gonzalez-Agirre, Aitor}, + booktitle = {Proceedings of the First Joint Conference on Lexical and Computational Semantics - Volume 1: Proceedings of the Main Conference and the Shared Task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation}, + location = {Montr\'{e}al, Canada}, + numpages = {9}, + pages = {385–393}, + publisher = {Association for Computational Linguistics}, + series = {SemEval '12}, + title = {SemEval-2012 task 6: a pilot on semantic textual similarity}, + year = {2012}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/STS13STS.py b/mteb/tasks/STS/eng/STS13STS.py index 415eafbc23..e345618fa8 100644 --- a/mteb/tasks/STS/eng/STS13STS.py +++ b/mteb/tasks/STS/eng/STS13STS.py @@ -27,13 +27,15 @@ class STS13STS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{Agirre2013SEM2S, - title={*SEM 2013 shared task: Semantic Textual Similarity}, - author={Eneko Agirre and Daniel Matthew Cer and Mona T. Diab and Aitor Gonzalez-Agirre and Weiwei Guo}, - booktitle={International Workshop on Semantic Evaluation}, - year={2013}, - url={https://api.semanticscholar.org/CorpusID:10241043} -}""", + bibtex_citation=r""" +@inproceedings{Agirre2013SEM2S, + author = {Eneko Agirre and Daniel Matthew Cer and Mona T. Diab and Aitor Gonzalez-Agirre and Weiwei Guo}, + booktitle = {International Workshop on Semantic Evaluation}, + title = {*SEM 2013 shared task: Semantic Textual Similarity}, + url = {https://api.semanticscholar.org/CorpusID:10241043}, + year = {2013}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/STS14STS.py b/mteb/tasks/STS/eng/STS14STS.py index 933cc124da..e2ab4f5d1e 100644 --- a/mteb/tasks/STS/eng/STS14STS.py +++ b/mteb/tasks/STS/eng/STS14STS.py @@ -27,24 +27,26 @@ class STS14STS(AbsTaskSTS): annotations_creators="derived", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{bandhakavi-etal-2014-generating, - title = "Generating a Word-Emotion Lexicon from {\#}Emotional Tweets", - author = "Bandhakavi, Anil and - Wiratunga, Nirmalie and - P, Deepak and - Massie, Stewart", - editor = "Bos, Johan and - Frank, Anette and - Navigli, Roberto", - booktitle = "Proceedings of the Third Joint Conference on Lexical and Computational Semantics (*{SEM} 2014)", - month = aug, - year = "2014", - address = "Dublin, Ireland", - publisher = "Association for Computational Linguistics and Dublin City University", - url = "https://aclanthology.org/S14-1002", - doi = "10.3115/v1/S14-1002", - pages = "12--21", -}""", + bibtex_citation=r""" +@inproceedings{bandhakavi-etal-2014-generating, + address = {Dublin, Ireland}, + author = {Bandhakavi, Anil and +Wiratunga, Nirmalie and +P, Deepak and +Massie, Stewart}, + booktitle = {Proceedings of the Third Joint Conference on Lexical and Computational Semantics (*{SEM} 2014)}, + doi = {10.3115/v1/S14-1002}, + editor = {Bos, Johan and +Frank, Anette and +Navigli, Roberto}, + month = aug, + pages = {12--21}, + publisher = {Association for Computational Linguistics and Dublin City University}, + title = {Generating a Word-Emotion Lexicon from {\#}Emotional Tweets}, + url = {https://aclanthology.org/S14-1002}, + year = {2014}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/STS15STS.py b/mteb/tasks/STS/eng/STS15STS.py index 99e81aa90f..4ffad2e282 100644 --- a/mteb/tasks/STS/eng/STS15STS.py +++ b/mteb/tasks/STS/eng/STS15STS.py @@ -27,22 +27,24 @@ class STS15STS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{bicici-2015-rtm, - title = "{RTM}-{DCU}: Predicting Semantic Similarity with Referential Translation Machines", - author = "Bi{\c{c}}ici, Ergun", - editor = "Nakov, Preslav and - Zesch, Torsten and - Cer, Daniel and - Jurgens, David", - booktitle = "Proceedings of the 9th International Workshop on Semantic Evaluation ({S}em{E}val 2015)", - month = jun, - year = "2015", - address = "Denver, Colorado", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/S15-2010", - doi = "10.18653/v1/S15-2010", - pages = "56--63", -}""", + bibtex_citation=r""" +@inproceedings{bicici-2015-rtm, + address = {Denver, Colorado}, + author = {Bi{\c{c}}ici, Ergun}, + booktitle = {Proceedings of the 9th International Workshop on Semantic Evaluation ({S}em{E}val 2015)}, + doi = {10.18653/v1/S15-2010}, + editor = {Nakov, Preslav and +Zesch, Torsten and +Cer, Daniel and +Jurgens, David}, + month = jun, + pages = {56--63}, + publisher = {Association for Computational Linguistics}, + title = {{RTM}-{DCU}: Predicting Semantic Similarity with Referential Translation Machines}, + url = {https://aclanthology.org/S15-2010}, + year = {2015}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/STS16STS.py b/mteb/tasks/STS/eng/STS16STS.py index 94c978d4fc..4e489ffc2d 100644 --- a/mteb/tasks/STS/eng/STS16STS.py +++ b/mteb/tasks/STS/eng/STS16STS.py @@ -27,28 +27,30 @@ class STS16STS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{nakov-etal-2016-semeval, - title = "{S}em{E}val-2016 Task 4: Sentiment Analysis in {T}witter", - author = "Nakov, Preslav and - Ritter, Alan and - Rosenthal, Sara and - Sebastiani, Fabrizio and - Stoyanov, Veselin", - editor = "Bethard, Steven and - Carpuat, Marine and - Cer, Daniel and - Jurgens, David and - Nakov, Preslav and - Zesch, Torsten", - booktitle = "Proceedings of the 10th International Workshop on Semantic Evaluation ({S}em{E}val-2016)", - month = jun, - year = "2016", - address = "San Diego, California", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/S16-1001", - doi = "10.18653/v1/S16-1001", - pages = "1--18", -}""", + bibtex_citation=r""" +@inproceedings{nakov-etal-2016-semeval, + address = {San Diego, California}, + author = {Nakov, Preslav and +Ritter, Alan and +Rosenthal, Sara and +Sebastiani, Fabrizio and +Stoyanov, Veselin}, + booktitle = {Proceedings of the 10th International Workshop on Semantic Evaluation ({S}em{E}val-2016)}, + doi = {10.18653/v1/S16-1001}, + editor = {Bethard, Steven and +Carpuat, Marine and +Cer, Daniel and +Jurgens, David and +Nakov, Preslav and +Zesch, Torsten}, + month = jun, + pages = {1--18}, + publisher = {Association for Computational Linguistics}, + title = {{S}em{E}val-2016 Task 4: Sentiment Analysis in {T}witter}, + url = {https://aclanthology.org/S16-1001}, + year = {2016}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/STSBenchmarkSTS.py b/mteb/tasks/STS/eng/STSBenchmarkSTS.py index e600711d34..c1363128d4 100644 --- a/mteb/tasks/STS/eng/STSBenchmarkSTS.py +++ b/mteb/tasks/STS/eng/STSBenchmarkSTS.py @@ -27,12 +27,14 @@ class STSBenchmarkSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@InProceedings{huggingface:dataset:stsb_multi_mt, -title = {Machine translated multilingual STS benchmark dataset.}, -author={Philip May}, -year={2021}, -url={https://github.com/PhilipMay/stsb-multi-mt} -}""", + bibtex_citation=r""" +@inproceedings{huggingface:dataset:stsb_multi_mt, + author = {Philip May}, + title = {Machine translated multilingual STS benchmark dataset.}, + url = {https://github.com/PhilipMay/stsb-multi-mt}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/eng/SickrSTS.py b/mteb/tasks/STS/eng/SickrSTS.py index 1c93fff578..b0b7a580d2 100644 --- a/mteb/tasks/STS/eng/SickrSTS.py +++ b/mteb/tasks/STS/eng/SickrSTS.py @@ -27,32 +27,34 @@ class SickrSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{marelli-etal-2014-sick, - title = "A {SICK} cure for the evaluation of compositional distributional semantic models", - author = "Marelli, Marco and - Menini, Stefano and - Baroni, Marco and - Bentivogli, Luisa and - Bernardi, Raffaella and - Zamparelli, Roberto", - editor = "Calzolari, Nicoletta and - Choukri, Khalid and - Declerck, Thierry and - Loftsson, Hrafn and - Maegaard, Bente and - Mariani, Joseph and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)", - month = may, - year = "2014", - address = "Reykjavik, Iceland", - publisher = "European Language Resources Association (ELRA)", - url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/363_Paper.pdf", - pages = "216--223", - abstract = "Shared and internationally recognized benchmarks are fundamental for the development of any computational system. We aim to help the research community working on compositional distributional semantic models (CDSMs) by providing SICK (Sentences Involving Compositional Knowldedge), a large size English benchmark tailored for them. SICK consists of about 10,000 English sentence pairs that include many examples of the lexical, syntactic and semantic phenomena that CDSMs are expected to account for, but do not require dealing with other aspects of existing sentential data sets (idiomatic multiword expressions, named entities, telegraphic language) that are not within the scope of CDSMs. By means of crowdsourcing techniques, each pair was annotated for two crucial semantic tasks: relatedness in meaning (with a 5-point rating scale as gold score) and entailment relation between the two elements (with three possible gold labels: entailment, contradiction, and neutral). The SICK data set was used in SemEval-2014 Task 1, and it freely available for research purposes.", -}""", + bibtex_citation=r""" +@inproceedings{marelli-etal-2014-sick, + abstract = {Shared and internationally recognized benchmarks are fundamental for the development of any computational system. We aim to help the research community working on compositional distributional semantic models (CDSMs) by providing SICK (Sentences Involving Compositional Knowldedge), a large size English benchmark tailored for them. SICK consists of about 10,000 English sentence pairs that include many examples of the lexical, syntactic and semantic phenomena that CDSMs are expected to account for, but do not require dealing with other aspects of existing sentential data sets (idiomatic multiword expressions, named entities, telegraphic language) that are not within the scope of CDSMs. By means of crowdsourcing techniques, each pair was annotated for two crucial semantic tasks: relatedness in meaning (with a 5-point rating scale as gold score) and entailment relation between the two elements (with three possible gold labels: entailment, contradiction, and neutral). The SICK data set was used in SemEval-2014 Task 1, and it freely available for research purposes.}, + address = {Reykjavik, Iceland}, + author = {Marelli, Marco and +Menini, Stefano and +Baroni, Marco and +Bentivogli, Luisa and +Bernardi, Raffaella and +Zamparelli, Roberto}, + booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)}, + editor = {Calzolari, Nicoletta and +Choukri, Khalid and +Declerck, Thierry and +Loftsson, Hrafn and +Maegaard, Bente and +Mariani, Joseph and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + month = may, + pages = {216--223}, + publisher = {European Language Resources Association (ELRA)}, + title = {A {SICK} cure for the evaluation of compositional distributional semantic models}, + url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/363_Paper.pdf}, + year = {2014}, +} +""", ) @property diff --git a/mteb/tasks/STS/fao/FaroeseSTS.py b/mteb/tasks/STS/fao/FaroeseSTS.py index 156485321a..2fa91ddf11 100644 --- a/mteb/tasks/STS/fao/FaroeseSTS.py +++ b/mteb/tasks/STS/fao/FaroeseSTS.py @@ -27,20 +27,20 @@ class FaroeseSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{snaebjarnarson-etal-2023-transfer, - title = "{T}ransfer to a Low-Resource Language via Close Relatives: The Case Study on Faroese", - author = "Snæbjarnarson, Vésteinn and - Simonsen, Annika and - Glavaš, Goran and - Vulić, Ivan", - booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = "may 22--24", - year = "2023", - address = "Tórshavn, Faroe Islands", - publisher = {Link{\"o}ping University Electronic Press, Sweden}, - } - """, + bibtex_citation=r""" +@inproceedings{snaebjarnarson-etal-2023-transfer, + address = {Tórshavn, Faroe Islands}, + author = {Snæbjarnarson, Vésteinn and +Simonsen, Annika and +Glavaš, Goran and +Vulić, Ivan}, + booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, + month = {may 22--24}, + publisher = {Link{\"o}ping University Electronic Press, Sweden}, + title = {{T}ransfer to a Low-Resource Language via Close Relatives: The Case Study on Faroese}, + year = {2023}, +} +""", ) @property diff --git a/mteb/tasks/STS/fin/FinParaSTS.py b/mteb/tasks/STS/fin/FinParaSTS.py index 8d647112e8..63331e7c8e 100644 --- a/mteb/tasks/STS/fin/FinParaSTS.py +++ b/mteb/tasks/STS/fin/FinParaSTS.py @@ -31,30 +31,30 @@ class FinParaSTS(AbsTaskSTS): annotations_creators="expert-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{kanerva-etal-2021-finnish, - title = "{F}innish Paraphrase Corpus", - author = {Kanerva, Jenna and - Ginter, Filip and - Chang, Li-Hsin and - Rastas, Iiro and - Skantsi, Valtteri and - Kilpel{\"a}inen, Jemina and - Kupari, Hanna-Mari and - Saarni, Jenna and - Sev{\'o}n, Maija and - Tarkka, Otto}, - editor = "Dobnik, Simon and - {\\O}vrelid, Lilja", - booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)", - month = may # " 31--2 " # jun, - year = "2021", - address = "Reykjavik, Iceland (Online)", - publisher = {Link{\"o}ping University Electronic Press, Sweden}, - url = "https://aclanthology.org/2021.nodalida-main.29", - pages = "288--298", - } - """, + bibtex_citation=r""" +@inproceedings{kanerva-etal-2021-finnish, + address = {Reykjavik, Iceland (Online)}, + author = {Kanerva, Jenna and +Ginter, Filip and +Chang, Li-Hsin and +Rastas, Iiro and +Skantsi, Valtteri and +Kilpel{\"a}inen, Jemina and +Kupari, Hanna-Mari and +Saarni, Jenna and +Sev{\'o}n, Maija and +Tarkka, Otto}, + booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)}, + editor = {Dobnik, Simon and +{\\O}vrelid, Lilja}, + month = may # { 31--2 } # jun, + pages = {288--298}, + publisher = {Link{\"o}ping University Electronic Press, Sweden}, + title = {{F}innish Paraphrase Corpus}, + url = {https://aclanthology.org/2021.nodalida-main.29}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/jpn/JSICK.py b/mteb/tasks/STS/jpn/JSICK.py index 554a3abf1d..cb0ea0920e 100644 --- a/mteb/tasks/STS/jpn/JSICK.py +++ b/mteb/tasks/STS/jpn/JSICK.py @@ -28,17 +28,17 @@ class JSICK(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @article{yanaka2022compositional, - title={Compositional Evaluation on Japanese Textual Entailment and Similarity}, - author={Yanaka, Hitomi and Mineshima, Koji}, - journal={Transactions of the Association for Computational Linguistics}, - volume={10}, - pages={1266--1284}, - year={2022}, - publisher={MIT Press One Broadway, 12th Floor, Cambridge, Massachusetts 02142, USA~…} - } - """, + bibtex_citation=r""" +@article{yanaka2022compositional, + author = {Yanaka, Hitomi and Mineshima, Koji}, + journal = {Transactions of the Association for Computational Linguistics}, + pages = {1266--1284}, + publisher = {MIT Press One Broadway, 12th Floor, Cambridge, Massachusetts 02142, USA~…}, + title = {Compositional Evaluation on Japanese Textual Entailment and Similarity}, + volume = {10}, + year = {2022}, +} +""", ) @property diff --git a/mteb/tasks/STS/jpn/JSTS.py b/mteb/tasks/STS/jpn/JSTS.py index 4993359190..1b426bded6 100644 --- a/mteb/tasks/STS/jpn/JSTS.py +++ b/mteb/tasks/STS/jpn/JSTS.py @@ -30,33 +30,35 @@ class JSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{kurihara-etal-2022-jglue, - title = "{JGLUE}: {J}apanese General Language Understanding Evaluation", - author = "Kurihara, Kentaro and - Kawahara, Daisuke and - Shibata, Tomohide", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", - month = jun, - year = "2022", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2022.lrec-1.317", - pages = "2957--2966", - abstract = "To develop high-performance natural language understanding (NLU) models, it is necessary to have a benchmark to evaluate and analyze NLU ability from various perspectives. While the English NLU benchmark, GLUE, has been the forerunner, benchmarks are now being released for languages other than English, such as CLUE for Chinese and FLUE for French; but there is no such benchmark for Japanese. We build a Japanese NLU benchmark, JGLUE, from scratch without translation to measure the general NLU ability in Japanese. We hope that JGLUE will facilitate NLU research in Japanese.", -}""", + bibtex_citation=r""" +@inproceedings{kurihara-etal-2022-jglue, + abstract = {To develop high-performance natural language understanding (NLU) models, it is necessary to have a benchmark to evaluate and analyze NLU ability from various perspectives. While the English NLU benchmark, GLUE, has been the forerunner, benchmarks are now being released for languages other than English, such as CLUE for Chinese and FLUE for French; but there is no such benchmark for Japanese. We build a Japanese NLU benchmark, JGLUE, from scratch without translation to measure the general NLU ability in Japanese. We hope that JGLUE will facilitate NLU research in Japanese.}, + address = {Marseille, France}, + author = {Kurihara, Kentaro and +Kawahara, Daisuke and +Shibata, Tomohide}, + booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, H{\'e}l{\`e}ne and +Odijk, Jan and +Piperidis, Stelios}, + month = jun, + pages = {2957--2966}, + publisher = {European Language Resources Association}, + title = {{JGLUE}: {J}apanese General Language Understanding Evaluation}, + url = {https://aclanthology.org/2022.lrec-1.317}, + year = {2022}, +} +""", ) @property diff --git a/mteb/tasks/STS/kor/KlueSTS.py b/mteb/tasks/STS/kor/KlueSTS.py index 0ff8a724bb..2934133fed 100644 --- a/mteb/tasks/STS/kor/KlueSTS.py +++ b/mteb/tasks/STS/kor/KlueSTS.py @@ -28,14 +28,16 @@ class KlueSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@misc{park2021klue, - title={KLUE: Korean Language Understanding Evaluation}, - author={Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, - year={2021}, - eprint={2105.09680}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + bibtex_citation=r""" +@misc{park2021klue, + archiveprefix = {arXiv}, + author = {Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho}, + eprint = {2105.09680}, + primaryclass = {cs.CL}, + title = {KLUE: Korean Language Understanding Evaluation}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/kor/KorSTS.py b/mteb/tasks/STS/kor/KorSTS.py index 6ab1437bb1..bc2ce9258e 100644 --- a/mteb/tasks/STS/kor/KorSTS.py +++ b/mteb/tasks/STS/kor/KorSTS.py @@ -27,12 +27,14 @@ class KorSTS(AbsTaskSTS): annotations_creators=None, dialect=[], sample_creation="machine-translated and localized", - bibtex_citation="""@article{ham2020kornli, - title={KorNLI and KorSTS: New Benchmark Datasets for Korean Natural Language Understanding}, - author={Ham, Jiyeon and Choe, Yo Joong and Park, Kyubyong and Choi, Ilji and Soh, Hyungjoon}, - journal={arXiv preprint arXiv:2004.03289}, - year={2020} -}""", + bibtex_citation=r""" +@article{ham2020kornli, + author = {Ham, Jiyeon and Choe, Yo Joong and Park, Kyubyong and Choi, Ilji and Soh, Hyungjoon}, + journal = {arXiv preprint arXiv:2004.03289}, + title = {KorNLI and KorSTS: New Benchmark Datasets for Korean Natural Language Understanding}, + year = {2020}, +} +""", ) @property diff --git a/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py b/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py index 2d5b653ec2..e22223979e 100644 --- a/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py @@ -60,19 +60,21 @@ class IndicCrosslingualSTS(AbsTaskSTS, MultilingualTask): annotations_creators="expert-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{10.1162/tacl_a_00452, - author = {Ramesh, Gowtham and Doddapaneni, Sumanth and Bheemaraj, Aravinth and Jobanputra, Mayank and AK, Raghavan and Sharma, Ajitesh and Sahoo, Sujit and Diddee, Harshita and J, Mahalakshmi and Kakwani, Divyanshu and Kumar, Navneet and Pradeep, Aswin and Nagaraj, Srihari and Deepak, Kumar and Raghavan, Vivek and Kunchukuttan, Anoop and Kumar, Pratyush and Khapra, Mitesh Shantadevi}, - title = "{Samanantar: The Largest Publicly Available Parallel Corpora Collection for 11 Indic Languages}", - journal = {Transactions of the Association for Computational Linguistics}, - volume = {10}, - pages = {145-162}, - year = {2022}, - month = {02}, - issn = {2307-387X}, - doi = {10.1162/tacl_a_00452}, - url = {https://doi.org/10.1162/tacl\\_a\\_00452}, - eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\\_a\\_00452/1987010/tacl\\_a\\_00452.pdf}, -}""", + bibtex_citation=r""" +@article{10.1162/tacl_a_00452, + author = {Ramesh, Gowtham and Doddapaneni, Sumanth and Bheemaraj, Aravinth and Jobanputra, Mayank and AK, Raghavan and Sharma, Ajitesh and Sahoo, Sujit and Diddee, Harshita and J, Mahalakshmi and Kakwani, Divyanshu and Kumar, Navneet and Pradeep, Aswin and Nagaraj, Srihari and Deepak, Kumar and Raghavan, Vivek and Kunchukuttan, Anoop and Kumar, Pratyush and Khapra, Mitesh Shantadevi}, + doi = {10.1162/tacl_a_00452}, + eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\\_a\\_00452/1987010/tacl\\_a\\_00452.pdf}, + issn = {2307-387X}, + journal = {Transactions of the Association for Computational Linguistics}, + month = {02}, + pages = {145-162}, + title = {{Samanantar: The Largest Publicly Available Parallel Corpora Collection for 11 Indic Languages}}, + url = {https://doi.org/10.1162/tacl\\_a\\_00452}, + volume = {10}, + year = {2022}, +} +""", ) @property diff --git a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py index 478f2fbd68..47789d9648 100644 --- a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py @@ -42,29 +42,31 @@ class STS17Crosslingual(AbsTaskSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@inproceedings{cer-etal-2017-semeval, - title = "{S}em{E}val-2017 Task 1: Semantic Textual Similarity Multilingual and Crosslingual Focused Evaluation", - author = "Cer, Daniel and - Diab, Mona and - Agirre, Eneko and - Lopez-Gazpio, I{\\~n}igo and - Specia, Lucia", - editor = "Bethard, Steven and - Carpuat, Marine and - Apidianaki, Marianna and - Mohammad, Saif M. and - Cer, Daniel and - Jurgens, David", - booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)", - month = aug, - year = "2017", - address = "Vancouver, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/S17-2001", - doi = "10.18653/v1/S17-2001", - pages = "1--14", - abstract = "Semantic Textual Similarity (STS) measures the meaning similarity of sentences. Applications include machine translation (MT), summarization, generation, question answering (QA), short answer grading, semantic search, dialog and conversational systems. The STS shared task is a venue for assessing the current state-of-the-art. The 2017 task focuses on multilingual and cross-lingual pairs with one sub-track exploring MT quality estimation (MTQE) data. The task obtained strong participation from 31 teams, with 17 participating in \textit{all language tracks}. We summarize performance and review a selection of well performing methods. Analysis highlights common errors, providing insight into the limitations of existing models. To support ongoing work on semantic representations, the \textit{STS Benchmark} is introduced as a new shared training and evaluation set carefully selected from the corpus of English STS shared task data (2012-2017).", -}""", + bibtex_citation=r""" +@inproceedings{cer-etal-2017-semeval, + abstract = {Semantic Textual Similarity (STS) measures the meaning similarity of sentences. Applications include machine translation (MT), summarization, generation, question answering (QA), short answer grading, semantic search, dialog and conversational systems. The STS shared task is a venue for assessing the current state-of-the-art. The 2017 task focuses on multilingual and cross-lingual pairs with one sub-track exploring MT quality estimation (MTQE) data. The task obtained strong participation from 31 teams, with 17 participating in \textit{all language tracks}. We summarize performance and review a selection of well performing methods. Analysis highlights common errors, providing insight into the limitations of existing models. To support ongoing work on semantic representations, the \textit{STS Benchmark} is introduced as a new shared training and evaluation set carefully selected from the corpus of English STS shared task data (2012-2017).}, + address = {Vancouver, Canada}, + author = {Cer, Daniel and +Diab, Mona and +Agirre, Eneko and +Lopez-Gazpio, I{\\~n}igo and +Specia, Lucia}, + booktitle = {Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)}, + doi = {10.18653/v1/S17-2001}, + editor = {Bethard, Steven and +Carpuat, Marine and +Apidianaki, Marianna and +Mohammad, Saif M. and +Cer, Daniel and +Jurgens, David}, + month = aug, + pages = {1--14}, + publisher = {Association for Computational Linguistics}, + title = {{S}em{E}val-2017 Task 1: Semantic Textual Similarity Multilingual and Crosslingual Focused Evaluation}, + url = {https://aclanthology.org/S17-2001}, + year = {2017}, +} +""", ) @property diff --git a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py index cc231b63b3..09eb936a48 100644 --- a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py @@ -49,34 +49,36 @@ class STS22CrosslingualSTSv2(AbsTaskSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{chen-etal-2022-semeval, - title = "{S}em{E}val-2022 Task 8: Multilingual news article similarity", - author = {Chen, Xi and - Zeynali, Ali and - Camargo, Chico and - Fl{\"o}ck, Fabian and - Gaffney, Devin and - Grabowicz, Przemyslaw and - Hale, Scott and - Jurgens, David and - Samory, Mattia}, - editor = "Emerson, Guy and - Schluter, Natalie and - Stanovsky, Gabriel and - Kumar, Ritesh and - Palmer, Alexis and - Schneider, Nathan and - Singh, Siddharth and - Ratan, Shyam", - booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)", - month = jul, - year = "2022", - address = "Seattle, United States", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.semeval-1.155", - doi = "10.18653/v1/2022.semeval-1.155", - pages = "1094--1106", -}""", + bibtex_citation=r""" +@inproceedings{chen-etal-2022-semeval, + address = {Seattle, United States}, + author = {Chen, Xi and +Zeynali, Ali and +Camargo, Chico and +Fl{\"o}ck, Fabian and +Gaffney, Devin and +Grabowicz, Przemyslaw and +Hale, Scott and +Jurgens, David and +Samory, Mattia}, + booktitle = {Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)}, + doi = {10.18653/v1/2022.semeval-1.155}, + editor = {Emerson, Guy and +Schluter, Natalie and +Stanovsky, Gabriel and +Kumar, Ritesh and +Palmer, Alexis and +Schneider, Nathan and +Singh, Siddharth and +Ratan, Shyam}, + month = jul, + pages = {1094--1106}, + publisher = {Association for Computational Linguistics}, + title = {{S}em{E}val-2022 Task 8: Multilingual news article similarity}, + url = {https://aclanthology.org/2022.semeval-1.155}, + year = {2022}, +} +""", adapted_from=["STS22"], ) @@ -112,34 +114,36 @@ class STS22CrosslingualSTS(AbsTaskSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{chen-etal-2022-semeval, - title = "{S}em{E}val-2022 Task 8: Multilingual news article similarity", - author = {Chen, Xi and - Zeynali, Ali and - Camargo, Chico and - Fl{\"o}ck, Fabian and - Gaffney, Devin and - Grabowicz, Przemyslaw and - Hale, Scott and - Jurgens, David and - Samory, Mattia}, - editor = "Emerson, Guy and - Schluter, Natalie and - Stanovsky, Gabriel and - Kumar, Ritesh and - Palmer, Alexis and - Schneider, Nathan and - Singh, Siddharth and - Ratan, Shyam", - booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)", - month = jul, - year = "2022", - address = "Seattle, United States", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.semeval-1.155", - doi = "10.18653/v1/2022.semeval-1.155", - pages = "1094--1106", -}""", + bibtex_citation=r""" +@inproceedings{chen-etal-2022-semeval, + address = {Seattle, United States}, + author = {Chen, Xi and +Zeynali, Ali and +Camargo, Chico and +Fl{\"o}ck, Fabian and +Gaffney, Devin and +Grabowicz, Przemyslaw and +Hale, Scott and +Jurgens, David and +Samory, Mattia}, + booktitle = {Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)}, + doi = {10.18653/v1/2022.semeval-1.155}, + editor = {Emerson, Guy and +Schluter, Natalie and +Stanovsky, Gabriel and +Kumar, Ritesh and +Palmer, Alexis and +Schneider, Nathan and +Singh, Siddharth and +Ratan, Shyam}, + month = jul, + pages = {1094--1106}, + publisher = {Association for Computational Linguistics}, + title = {{S}em{E}val-2022 Task 8: Multilingual news article similarity}, + url = {https://aclanthology.org/2022.semeval-1.155}, + year = {2022}, +} +""", ) @property diff --git a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py index eaf5ff1afb..9e04db5b02 100644 --- a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py +++ b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py @@ -46,12 +46,14 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@InProceedings{huggingface:dataset:stsb_multi_mt, - title = {Machine translated multilingual STS benchmark dataset.}, - author={Philip May}, - year={2021}, - url={https://github.com/PhilipMay/stsb-multi-mt} - }""", + bibtex_citation=r""" +@inproceedings{huggingface:dataset:stsb_multi_mt, + author = {Philip May}, + title = {Machine translated multilingual STS benchmark dataset.}, + url = {https://github.com/PhilipMay/stsb-multi-mt}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/multilingual/SemRel24STS.py b/mteb/tasks/STS/multilingual/SemRel24STS.py index f6c1e7c402..c94ae87c02 100644 --- a/mteb/tasks/STS/multilingual/SemRel24STS.py +++ b/mteb/tasks/STS/multilingual/SemRel24STS.py @@ -49,20 +49,21 @@ class SemRel24STS(AbsTaskSTS, MultilingualTask): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@misc{ousidhoum2024semrel2024, - title={SemRel2024: A Collection of Semantic Textual Relatedness Datasets for 14 Languages}, - author={Nedjma Ousidhoum and Shamsuddeen Hassan Muhammad and Mohamed Abdalla and Idris Abdulmumin and Ibrahim Said Ahmad and - Sanchit Ahuja and Alham Fikri Aji and Vladimir Araujo and Abinew Ali Ayele and Pavan Baswani and Meriem Beloucif and - Chris Biemann and Sofia Bourhim and Christine De Kock and Genet Shanko Dekebo and - Oumaima Hourrane and Gopichand Kanumolu and Lokesh Madasu and Samuel Rutunda and Manish Shrivastava and - Thamar Solorio and Nirmal Surange and Hailegnaw Getaneh Tilaye and Krishnapriya Vishnubhotla and Genta Winata and - Seid Muhie Yimam and Saif M. Mohammad}, - year={2024}, - eprint={2402.08638}, - archivePrefix={arXiv}, - primaryClass={cs.CL} - } - """, + bibtex_citation=r""" +@misc{ousidhoum2024semrel2024, + archiveprefix = {arXiv}, + author = {Nedjma Ousidhoum and Shamsuddeen Hassan Muhammad and Mohamed Abdalla and Idris Abdulmumin and Ibrahim Said Ahmad and +Sanchit Ahuja and Alham Fikri Aji and Vladimir Araujo and Abinew Ali Ayele and Pavan Baswani and Meriem Beloucif and +Chris Biemann and Sofia Bourhim and Christine De Kock and Genet Shanko Dekebo and +Oumaima Hourrane and Gopichand Kanumolu and Lokesh Madasu and Samuel Rutunda and Manish Shrivastava and +Thamar Solorio and Nirmal Surange and Hailegnaw Getaneh Tilaye and Krishnapriya Vishnubhotla and Genta Winata and +Seid Muhie Yimam and Saif M. Mohammad}, + eprint = {2402.08638}, + primaryclass = {cs.CL}, + title = {SemRel2024: A Collection of Semantic Textual Relatedness Datasets for 14 Languages}, + year = {2024}, +} +""", ) @property diff --git a/mteb/tasks/STS/pol/PolishSTS.py b/mteb/tasks/STS/pol/PolishSTS.py index 9115f37996..db1deb0eac 100644 --- a/mteb/tasks/STS/pol/PolishSTS.py +++ b/mteb/tasks/STS/pol/PolishSTS.py @@ -26,37 +26,37 @@ class SickrPLSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{dadas-etal-2020-evaluation, - title = "Evaluation of Sentence Representations in {P}olish", - author = "Dadas, Slawomir and - Perelkiewicz, Michal and - Poswiata, Rafal", - editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and - Choukri, Khalid and - Cieri, Christopher and - Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and - Maegaard, Bente and - Mariani, Joseph and - Mazo, Helene and - Moreno, Asuncion and - Odijk, Jan and - Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", - month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.207", - pages = "1674--1680", - language = "English", - ISBN = "979-10-95546-34-4", + address = {Marseille, France}, + author = {Dadas, Slawomir and +Perelkiewicz, Michal and +Poswiata, Rafal}, + booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference}, + editor = {Calzolari, Nicoletta and +B{\'e}chet, Fr{\'e}d{\'e}ric and +Blache, Philippe and +Choukri, Khalid and +Cieri, Christopher and +Declerck, Thierry and +Goggi, Sara and +Isahara, Hitoshi and +Maegaard, Bente and +Mariani, Joseph and +Mazo, Helene and +Moreno, Asuncion and +Odijk, Jan and +Piperidis, Stelios}, + isbn = {979-10-95546-34-4}, + language = {English}, + month = may, + pages = {1674--1680}, + publisher = {European Language Resources Association}, + title = {Evaluation of Sentence Representations in {P}olish}, + url = {https://aclanthology.org/2020.lrec-1.207}, + year = {2020}, } - """, +""", ) @property @@ -89,24 +89,23 @@ class CdscrSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{wroblewska-krasnowska-kieras-2017-polish, - title = "{P}olish evaluation dataset for compositional distributional semantics models", - author = "Wr{\'o}blewska, Alina and - Krasnowska-Kiera{\'s}, Katarzyna", - editor = "Barzilay, Regina and - Kan, Min-Yen", - booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", - month = jul, - year = "2017", - address = "Vancouver, Canada", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/P17-1073", - doi = "10.18653/v1/P17-1073", - pages = "784--792", + address = {Vancouver, Canada}, + author = {Wr{\'o}blewska, Alina and +Krasnowska-Kiera{\'s}, Katarzyna}, + booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.18653/v1/P17-1073}, + editor = {Barzilay, Regina and +Kan, Min-Yen}, + month = jul, + pages = {784--792}, + publisher = {Association for Computational Linguistics}, + title = {{P}olish evaluation dataset for compositional distributional semantics models}, + url = {https://aclanthology.org/P17-1073}, + year = {2017}, } - - """, +""", ) @property diff --git a/mteb/tasks/STS/por/Assin2STS.py b/mteb/tasks/STS/por/Assin2STS.py index e96ae97c34..0719f03607 100644 --- a/mteb/tasks/STS/por/Assin2STS.py +++ b/mteb/tasks/STS/por/Assin2STS.py @@ -26,14 +26,16 @@ class Assin2STS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation="""@inproceedings{real2020assin, - title={The assin 2 shared task: a quick overview}, - author={Real, Livy and Fonseca, Erick and Oliveira, Hugo Goncalo}, - booktitle={International Conference on Computational Processing of the Portuguese Language}, - pages={406--412}, - year={2020}, - organization={Springer} - }""", + bibtex_citation=r""" +@inproceedings{real2020assin, + author = {Real, Livy and Fonseca, Erick and Oliveira, Hugo Goncalo}, + booktitle = {International Conference on Computational Processing of the Portuguese Language}, + organization = {Springer}, + pages = {406--412}, + title = {The assin 2 shared task: a quick overview}, + year = {2020}, +} +""", ) @property diff --git a/mteb/tasks/STS/por/SickBrSTS.py b/mteb/tasks/STS/por/SickBrSTS.py index ee75ecdd39..d9733ed50b 100644 --- a/mteb/tasks/STS/por/SickBrSTS.py +++ b/mteb/tasks/STS/por/SickBrSTS.py @@ -29,27 +29,27 @@ class SickBrSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="human-translated and localized", - bibtex_citation=""" + bibtex_citation=r""" @inproceedings{real18, - author={Real, Livy - and Rodrigues, Ana - and Vieira e Silva, Andressa - and Albiero, Beatriz - and Thalenberg, Bruna - and Guide, Bruno - and Silva, Cindy - and de Oliveira Lima, Guilherme - and Camara, Igor C. S. - and Stanojevi{\'{c}}, Milo{\v{s}} - and Souza, Rodrigo - and de Paiva, Valeria}, - year ="2018", - title="{SICK-BR: A Portuguese Corpus for Inference}", - booktitle="{Computational Processing of the Portuguese Language. PROPOR 2018.}", - doi ="10.1007/978-3-319-99722-3_31", - isbn="978-3-319-99722-3" + author = {Real, Livy +and Rodrigues, Ana +and Vieira e Silva, Andressa +and Albiero, Beatriz +and Thalenberg, Bruna +and Guide, Bruno +and Silva, Cindy +and de Oliveira Lima, Guilherme +and Camara, Igor C. S. +and Stanojevi{\'{c}}, Milo{\v{s}} +and Souza, Rodrigo +and de Paiva, Valeria}, + booktitle = {{Computational Processing of the Portuguese Language. PROPOR 2018.}}, + doi = {10.1007/978-3-319-99722-3_31}, + isbn = {978-3-319-99722-3}, + title = {{SICK-BR: A Portuguese Corpus for Inference}}, + year = {2018}, } - """, +""", ) @property diff --git a/mteb/tasks/STS/ron/RonSTS.py b/mteb/tasks/STS/ron/RonSTS.py index 4941cba3e6..d08dd109bb 100644 --- a/mteb/tasks/STS/ron/RonSTS.py +++ b/mteb/tasks/STS/ron/RonSTS.py @@ -28,14 +28,14 @@ class RonSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation=""" - @inproceedings{dumitrescu2021liro, - title={LiRo: Benchmark and leaderboard for Romanian language tasks}, - author={Dumitrescu, Stefan Daniel and Rebeja, Petru and Lorincz, Beata and Gaman, Mihaela and Avram, Andrei and Ilie, Mihai and Pruteanu, Andrei and Stan, Adriana and Rosia, Lorena and Iacobescu, Cristina and others}, - booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)}, - year={2021} - } - """, + bibtex_citation=r""" +@inproceedings{dumitrescu2021liro, + author = {Dumitrescu, Stefan Daniel and Rebeja, Petru and Lorincz, Beata and Gaman, Mihaela and Avram, Andrei and Ilie, Mihai and Pruteanu, Andrei and Stan, Adriana and Rosia, Lorena and Iacobescu, Cristina and others}, + booktitle = {Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)}, + title = {LiRo: Benchmark and leaderboard for Romanian language tasks}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/rus/RUParaPhraserSTS.py b/mteb/tasks/STS/rus/RUParaPhraserSTS.py index 9174f2f661..19e3addfe5 100644 --- a/mteb/tasks/STS/rus/RUParaPhraserSTS.py +++ b/mteb/tasks/STS/rus/RUParaPhraserSTS.py @@ -27,30 +27,31 @@ class RUParaPhraserSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="found", - bibtex_citation=""" - @inproceedings{gudkov-etal-2020-automatically, - title = "Automatically Ranked {R}ussian Paraphrase Corpus for Text Generation", - author = "Gudkov, Vadim and - Mitrofanova, Olga and - Filippskikh, Elizaveta", - booktitle = "Proceedings of the Fourth Workshop on Neural Generation and Translation", - month = jul, - year = "2020", - address = "Online", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2020.ngt-1.6", - doi = "10.18653/v1/2020.ngt-1.6", - pages = "54--59", - } - @inproceedings{pivovarova2017paraphraser, - title={ParaPhraser: Russian paraphrase corpus and shared task}, - author={Pivovarova, Lidia and Pronoza, Ekaterina and Yagunova, Elena and Pronoza, Anton}, - booktitle={Conference on artificial intelligence and natural language}, - pages={211--225}, - year={2017}, - organization={Springer} - } - """, + bibtex_citation=r""" +@inproceedings{gudkov-etal-2020-automatically, + address = {Online}, + author = {Gudkov, Vadim and +Mitrofanova, Olga and +Filippskikh, Elizaveta}, + booktitle = {Proceedings of the Fourth Workshop on Neural Generation and Translation}, + doi = {10.18653/v1/2020.ngt-1.6}, + month = jul, + pages = {54--59}, + publisher = {Association for Computational Linguistics}, + title = {Automatically Ranked {R}ussian Paraphrase Corpus for Text Generation}, + url = {https://aclanthology.org/2020.ngt-1.6}, + year = {2020}, +} + +@inproceedings{pivovarova2017paraphraser, + author = {Pivovarova, Lidia and Pronoza, Ekaterina and Yagunova, Elena and Pronoza, Anton}, + booktitle = {Conference on artificial intelligence and natural language}, + organization = {Springer}, + pages = {211--225}, + title = {ParaPhraser: Russian paraphrase corpus and shared task}, + year = {2017}, +} +""", ) @property diff --git a/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py b/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py index 4fd9198baf..44e3395e1c 100644 --- a/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py @@ -28,12 +28,14 @@ class RuSTSBenchmarkSTS(AbsTaskSTS): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated and verified", - bibtex_citation="""@InProceedings{huggingface:dataset:stsb_multi_mt, -title = {Machine translated multilingual STS benchmark dataset.}, -author={Philip May}, -year={2021}, -url={https://github.com/PhilipMay/stsb-multi-mt} -}""", + bibtex_citation=r""" +@inproceedings{huggingface:dataset:stsb_multi_mt, + author = {Philip May}, + title = {Machine translated multilingual STS benchmark dataset.}, + url = {https://github.com/PhilipMay/stsb-multi-mt}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/STS/spa/STSES.py b/mteb/tasks/STS/spa/STSES.py index 8bdbf227a2..844e132c77 100644 --- a/mteb/tasks/STS/spa/STSES.py +++ b/mteb/tasks/STS/spa/STSES.py @@ -30,21 +30,21 @@ class STSES(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{agirre2015semeval, - title={Semeval-2015 task 2: Semantic textual similarity, english, spanish and pilot on interpretability}, - author={Agirre, Eneko and Banea, Carmen and Cardie, Claire and Cer, Daniel and Diab, Mona and Gonzalez-Agirre, Aitor and Guo, Weiwei and Lopez-Gazpio, Inigo and Maritxalar, Montse and Mihalcea, Rada and others}, - booktitle={Proceedings of the 9th international workshop on semantic evaluation (SemEval 2015)}, - pages={252--263}, - year={2015} + bibtex_citation=r""" +@inproceedings{agirre2014semeval, + author = {Agirre, Eneko and Banea, Carmen and Cardie, Claire and Cer, Daniel M and Diab, Mona T and Gonzalez-Agirre, Aitor and Guo, Weiwei and Mihalcea, Rada and Rigau, German and Wiebe, Janyce}, + booktitle = {SemEval@ COLING}, + pages = {81--91}, + title = {SemEval-2014 Task 10: Multilingual Semantic Textual Similarity.}, + year = {2014}, } - -@inproceedings{agirre2014semeval, - title={SemEval-2014 Task 10: Multilingual Semantic Textual Similarity.}, - author={Agirre, Eneko and Banea, Carmen and Cardie, Claire and Cer, Daniel M and Diab, Mona T and Gonzalez-Agirre, Aitor and Guo, Weiwei and Mihalcea, Rada and Rigau, German and Wiebe, Janyce}, - booktitle={SemEval@ COLING}, - pages={81--91}, - year={2014} +@inproceedings{agirre2015semeval, + author = {Agirre, Eneko and Banea, Carmen and Cardie, Claire and Cer, Daniel and Diab, Mona and Gonzalez-Agirre, Aitor and Guo, Weiwei and Lopez-Gazpio, Inigo and Maritxalar, Montse and Mihalcea, Rada and others}, + booktitle = {Proceedings of the 9th international workshop on semantic evaluation (SemEval 2015)}, + pages = {252--263}, + title = {Semeval-2015 task 2: Semantic textual similarity, english, spanish and pilot on interpretability}, + year = {2015}, } """, ) diff --git a/mteb/tasks/STS/zho/CMTEBSTS.py b/mteb/tasks/STS/zho/CMTEBSTS.py index bcc149f937..fed68649eb 100644 --- a/mteb/tasks/STS/zho/CMTEBSTS.py +++ b/mteb/tasks/STS/zho/CMTEBSTS.py @@ -27,26 +27,28 @@ class ATEC(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{raghu-etal-2021-end, - title = "End-to-End Learning of Flowchart Grounded Task-Oriented Dialogs", - author = "Raghu, Dinesh and - Agarwal, Shantanu and - Joshi, Sachindra and - {Mausam}", - editor = "Moens, Marie-Francine and - Huang, Xuanjing and - Specia, Lucia and - Yih, Scott Wen-tau", - booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", - month = nov, - year = "2021", - address = "Online and Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.emnlp-main.357", - doi = "10.18653/v1/2021.emnlp-main.357", - pages = "4348--4366", - abstract = "We propose a novel problem within end-to-end learning of task oriented dialogs (TOD), in which the dialog system mimics a troubleshooting agent who helps a user by diagnosing their problem (e.g., car not starting). Such dialogs are grounded in domain-specific flowcharts, which the agent is supposed to follow during the conversation. Our task exposes novel technical challenges for neural TOD, such as grounding an utterance to the flowchart without explicit annotation, referring to additional manual pages when user asks a clarification question, and ability to follow unseen flowcharts at test time. We release a dataset (FLODIAL) consisting of 2,738 dialogs grounded on 12 different troubleshooting flowcharts. We also design a neural model, FLONET, which uses a retrieval-augmented generation architecture to train the dialog agent. Our experiments find that FLONET can do zero-shot transfer to unseen flowcharts, and sets a strong baseline for future research.", -}""", + bibtex_citation=r""" +@inproceedings{raghu-etal-2021-end, + abstract = {We propose a novel problem within end-to-end learning of task oriented dialogs (TOD), in which the dialog system mimics a troubleshooting agent who helps a user by diagnosing their problem (e.g., car not starting). Such dialogs are grounded in domain-specific flowcharts, which the agent is supposed to follow during the conversation. Our task exposes novel technical challenges for neural TOD, such as grounding an utterance to the flowchart without explicit annotation, referring to additional manual pages when user asks a clarification question, and ability to follow unseen flowcharts at test time. We release a dataset (FLODIAL) consisting of 2,738 dialogs grounded on 12 different troubleshooting flowcharts. We also design a neural model, FLONET, which uses a retrieval-augmented generation architecture to train the dialog agent. Our experiments find that FLONET can do zero-shot transfer to unseen flowcharts, and sets a strong baseline for future research.}, + address = {Online and Punta Cana, Dominican Republic}, + author = {Raghu, Dinesh and +Agarwal, Shantanu and +Joshi, Sachindra and +{Mausam}}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/2021.emnlp-main.357}, + editor = {Moens, Marie-Francine and +Huang, Xuanjing and +Specia, Lucia and +Yih, Scott Wen-tau}, + month = nov, + pages = {4348--4366}, + publisher = {Association for Computational Linguistics}, + title = {End-to-End Learning of Flowchart Grounded Task-Oriented Dialogs}, + url = {https://aclanthology.org/2021.emnlp-main.357}, + year = {2021}, +} +""", ) @property @@ -79,15 +81,17 @@ class BQ(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{xiao2024cpackpackagedresourcesadvance, - title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, - author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, - year={2024}, - eprint={2309.07597}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2309.07597}, -}""", + bibtex_citation=r""" +@misc{xiao2024cpackpackagedresourcesadvance, + archiveprefix = {arXiv}, + author = {Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, + eprint = {2309.07597}, + primaryclass = {cs.CL}, + title = {C-Pack: Packaged Resources To Advance General Chinese Embedding}, + url = {https://arxiv.org/abs/2309.07597}, + year = {2024}, +} +""", ) @property @@ -120,15 +124,17 @@ class LCQMC(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{xiao2024cpackpackagedresourcesadvance, - title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, - author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, - year={2024}, - eprint={2309.07597}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2309.07597}, -}""", + bibtex_citation=r""" +@misc{xiao2024cpackpackagedresourcesadvance, + archiveprefix = {arXiv}, + author = {Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, + eprint = {2309.07597}, + primaryclass = {cs.CL}, + title = {C-Pack: Packaged Resources To Advance General Chinese Embedding}, + url = {https://arxiv.org/abs/2309.07597}, + year = {2024}, +} +""", ) @property @@ -161,15 +167,17 @@ class PAWSX(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{xiao2024cpackpackagedresourcesadvance, - title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, - author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, - year={2024}, - eprint={2309.07597}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2309.07597}, -}""", + bibtex_citation=r""" +@misc{xiao2024cpackpackagedresourcesadvance, + archiveprefix = {arXiv}, + author = {Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, + eprint = {2309.07597}, + primaryclass = {cs.CL}, + title = {C-Pack: Packaged Resources To Advance General Chinese Embedding}, + url = {https://arxiv.org/abs/2309.07597}, + year = {2024}, +} +""", ) @property @@ -202,15 +210,17 @@ class STSB(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@misc{xiao2024cpackpackagedresourcesadvance, - title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, - author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, - year={2024}, - eprint={2309.07597}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2309.07597}, -}""", + bibtex_citation=r""" +@misc{xiao2024cpackpackagedresourcesadvance, + archiveprefix = {arXiv}, + author = {Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff and Defu Lian and Jian-Yun Nie}, + eprint = {2309.07597}, + primaryclass = {cs.CL}, + title = {C-Pack: Packaged Resources To Advance General Chinese Embedding}, + url = {https://arxiv.org/abs/2309.07597}, + year = {2024}, +} +""", ) @property @@ -243,26 +253,28 @@ class AFQMC(AbsTaskSTS): annotations_creators=None, dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{raghu-etal-2021-end, - title = "End-to-End Learning of Flowchart Grounded Task-Oriented Dialogs", - author = "Raghu, Dinesh and - Agarwal, Shantanu and - Joshi, Sachindra and - {Mausam}", - editor = "Moens, Marie-Francine and - Huang, Xuanjing and - Specia, Lucia and - Yih, Scott Wen-tau", - booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", - month = nov, - year = "2021", - address = "Online and Punta Cana, Dominican Republic", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2021.emnlp-main.357", - doi = "10.18653/v1/2021.emnlp-main.357", - pages = "4348--4366", - abstract = "We propose a novel problem within end-to-end learning of task oriented dialogs (TOD), in which the dialog system mimics a troubleshooting agent who helps a user by diagnosing their problem (e.g., car not starting). Such dialogs are grounded in domain-specific flowcharts, which the agent is supposed to follow during the conversation. Our task exposes novel technical challenges for neural TOD, such as grounding an utterance to the flowchart without explicit annotation, referring to additional manual pages when user asks a clarification question, and ability to follow unseen flowcharts at test time. We release a dataset (FLODIAL) consisting of 2,738 dialogs grounded on 12 different troubleshooting flowcharts. We also design a neural model, FLONET, which uses a retrieval-augmented generation architecture to train the dialog agent. Our experiments find that FLONET can do zero-shot transfer to unseen flowcharts, and sets a strong baseline for future research.", -}""", + bibtex_citation=r""" +@inproceedings{raghu-etal-2021-end, + abstract = {We propose a novel problem within end-to-end learning of task oriented dialogs (TOD), in which the dialog system mimics a troubleshooting agent who helps a user by diagnosing their problem (e.g., car not starting). Such dialogs are grounded in domain-specific flowcharts, which the agent is supposed to follow during the conversation. Our task exposes novel technical challenges for neural TOD, such as grounding an utterance to the flowchart without explicit annotation, referring to additional manual pages when user asks a clarification question, and ability to follow unseen flowcharts at test time. We release a dataset (FLODIAL) consisting of 2,738 dialogs grounded on 12 different troubleshooting flowcharts. We also design a neural model, FLONET, which uses a retrieval-augmented generation architecture to train the dialog agent. Our experiments find that FLONET can do zero-shot transfer to unseen flowcharts, and sets a strong baseline for future research.}, + address = {Online and Punta Cana, Dominican Republic}, + author = {Raghu, Dinesh and +Agarwal, Shantanu and +Joshi, Sachindra and +{Mausam}}, + booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + doi = {10.18653/v1/2021.emnlp-main.357}, + editor = {Moens, Marie-Francine and +Huang, Xuanjing and +Specia, Lucia and +Yih, Scott Wen-tau}, + month = nov, + pages = {4348--4366}, + publisher = {Association for Computational Linguistics}, + title = {End-to-End Learning of Flowchart Grounded Task-Oriented Dialogs}, + url = {https://aclanthology.org/2021.emnlp-main.357}, + year = {2021}, +} +""", ) @property diff --git a/mteb/tasks/Summarization/eng/SummEvalSummarization.py b/mteb/tasks/Summarization/eng/SummEvalSummarization.py index 39708ffbeb..743160195a 100644 --- a/mteb/tasks/Summarization/eng/SummEvalSummarization.py +++ b/mteb/tasks/Summarization/eng/SummEvalSummarization.py @@ -32,12 +32,14 @@ class SummEvalSummarization(AbsTaskSummarization): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{fabbri2020summeval, - title={SummEval: Re-evaluating Summarization Evaluation}, - author={Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, - journal={arXiv preprint arXiv:2007.12626}, - year={2020} -}""", + bibtex_citation=r""" +@article{fabbri2020summeval, + author = {Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, + journal = {arXiv preprint arXiv:2007.12626}, + title = {SummEval: Re-evaluating Summarization Evaluation}, + year = {2020}, +} +""", ) @property @@ -70,12 +72,14 @@ class SummEvalSummarizationv2(AbsTaskSummarization): annotations_creators="human-annotated", dialect=[], sample_creation="created", - bibtex_citation="""@article{fabbri2020summeval, - title={SummEval: Re-evaluating Summarization Evaluation}, - author={Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, - journal={arXiv preprint arXiv:2007.12626}, - year={2020} -}""", + bibtex_citation=r""" +@article{fabbri2020summeval, + author = {Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, + journal = {arXiv preprint arXiv:2007.12626}, + title = {SummEval: Re-evaluating Summarization Evaluation}, + year = {2020}, +} +""", adapted_from=["SummEvalSummarization"], ) diff --git a/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py b/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py index 109792b375..964655eb9a 100644 --- a/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py +++ b/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py @@ -31,12 +31,14 @@ class SummEvalFrSummarization(AbsTaskSummarization): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@article{fabbri2020summeval, - title={SummEval: Re-evaluating Summarization Evaluation}, - author={Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, - journal={arXiv preprint arXiv:2007.12626}, - year={2020} -}""", + bibtex_citation=r""" +@article{fabbri2020summeval, + author = {Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, + journal = {arXiv preprint arXiv:2007.12626}, + title = {SummEval: Re-evaluating Summarization Evaluation}, + year = {2020}, +} +""", ) @property @@ -70,12 +72,14 @@ class SummEvalFrSummarizationv2(AbsTaskSummarization): annotations_creators="human-annotated", dialect=[], sample_creation="machine-translated", - bibtex_citation="""@article{fabbri2020summeval, - title={SummEval: Re-evaluating Summarization Evaluation}, - author={Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, - journal={arXiv preprint arXiv:2007.12626}, - year={2020} -}""", + bibtex_citation=r""" +@article{fabbri2020summeval, + author = {Fabbri, Alexander R and Kry{\'s}ci{\'n}ski, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard and Radev, Dragomir}, + journal = {arXiv preprint arXiv:2007.12626}, + title = {SummEval: Re-evaluating Summarization Evaluation}, + year = {2020}, +} +""", adapted_from=["SummEvalFrSummarization"], ) diff --git a/mteb/tasks/aggregated_tasks/CQADupStackNLRetrieval.py b/mteb/tasks/aggregated_tasks/CQADupStackNLRetrieval.py index 46688bf199..c19ba6e8f5 100644 --- a/mteb/tasks/aggregated_tasks/CQADupStackNLRetrieval.py +++ b/mteb/tasks/aggregated_tasks/CQADupStackNLRetrieval.py @@ -53,14 +53,16 @@ class CQADupstackNLRetrieval(AbsTaskAggregate): annotations_creators="derived", dialect=[""], sample_creation="machine-translated and verified", # manually checked a small subset - bibtex_citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + bibtex_citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", adapted_from=["CQADupstackRetrieval"], ) diff --git a/mteb/tasks/aggregated_tasks/CQADupStackRetrieval.py b/mteb/tasks/aggregated_tasks/CQADupStackRetrieval.py index 917a667eb3..8b8bda0e68 100644 --- a/mteb/tasks/aggregated_tasks/CQADupStackRetrieval.py +++ b/mteb/tasks/aggregated_tasks/CQADupStackRetrieval.py @@ -42,21 +42,23 @@ class CQADupstackRetrieval(AbsTaskAggregate): main_score="ndcg_at_10", type="Retrieval", # since everything is retrieval - otherwise it would be "Aggregated" eval_splits=["test"], - bibtex_citation="""@inproceedings{hoogeveen2015, -author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, -title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, -booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, -series = {ADCS '15}, -year = {2015}, -isbn = {978-1-4503-4040-3}, -location = {Parramatta, NSW, Australia}, -pages = {3:1--3:8}, -articleno = {3}, -numpages = {8}, -url = {http://doi.acm.org/10.1145/2838931.2838934}, -doi = {10.1145/2838931.2838934}, -acmid = {2838934}, -publisher = {ACM}, -address = {New York, NY, USA}, -}""", + bibtex_citation=r""" +@inproceedings{hoogeveen2015, + acmid = {2838934}, + address = {New York, NY, USA}, + articleno = {3}, + author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy}, + booktitle = {Proceedings of the 20th Australasian Document Computing Symposium (ADCS)}, + doi = {10.1145/2838931.2838934}, + isbn = {978-1-4503-4040-3}, + location = {Parramatta, NSW, Australia}, + numpages = {8}, + pages = {3:1--3:8}, + publisher = {ACM}, + series = {ADCS '15}, + title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research}, + url = {http://doi.acm.org/10.1145/2838931.2838934}, + year = {2015}, +} +""", ) diff --git a/mteb/tasks/aggregated_tasks/STS17MultilingualVisualSTS.py b/mteb/tasks/aggregated_tasks/STS17MultilingualVisualSTS.py index 563f09cbe6..e8a6730da6 100644 --- a/mteb/tasks/aggregated_tasks/STS17MultilingualVisualSTS.py +++ b/mteb/tasks/aggregated_tasks/STS17MultilingualVisualSTS.py @@ -28,12 +28,14 @@ class STS17MultilingualVisualSTSEng(AbsTaskAggregate): main_score="cosine_spearman", type="VisualSTS(eng)", eval_splits=["test"], - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", ) @@ -82,10 +84,12 @@ class STS17MultilingualVisualSTSMultilingual(AbsTaskAggregate): "it-en": ["ita-Latn", "eng-Latn"], "nl-en": ["nld-Latn", "eng-Latn"], }, - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", ) diff --git a/mteb/tasks/aggregated_tasks/STSBenchmarkMultilingualVisualSTS.py b/mteb/tasks/aggregated_tasks/STSBenchmarkMultilingualVisualSTS.py index 74c5f9feb6..94cc13ecef 100644 --- a/mteb/tasks/aggregated_tasks/STSBenchmarkMultilingualVisualSTS.py +++ b/mteb/tasks/aggregated_tasks/STSBenchmarkMultilingualVisualSTS.py @@ -26,12 +26,14 @@ class STSBenchmarkMultilingualVisualSTSEng(AbsTaskAggregate): type="VisualSTS(eng)", eval_splits=["test"], eval_langs=["eng-Latn"], - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", ) @@ -88,10 +90,12 @@ class STSBenchmarkMultilingualVisualSTSMultilingual(AbsTaskAggregate): "rus-Cyrl", "cmn-Hans", ], - bibtex_citation="""@article{xiao2024pixel, - title={Pixel Sentence Representation Learning}, - author={Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, - journal={arXiv preprint arXiv:2402.08183}, - year={2024} -}""", + bibtex_citation=r""" +@article{xiao2024pixel, + author = {Xiao, Chenghao and Huang, Zhuoxu and Chen, Danlu and Hudson, G Thomas and Li, Yizhi and Duan, Haoran and Lin, Chenghua and Fu, Jie and Han, Jungong and Moubayed, Noura Al}, + journal = {arXiv preprint arXiv:2402.08183}, + title = {Pixel Sentence Representation Learning}, + year = {2024}, +} +""", ) From 17274c7510c74a589bab151ac6e0b65ef27b3dd0 Mon Sep 17 00:00:00 2001 From: Alexey Vatolin Date: Fri, 2 May 2025 00:30:21 +0200 Subject: [PATCH 3/6] format benchmarks --- mteb/benchmarks/benchmarks.py | 431 +++++++++++++++++------------- tests/test_citation_formatting.py | 64 +++++ 2 files changed, 303 insertions(+), 192 deletions(-) create mode 100644 tests/test_citation_formatting.py diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index d79056de6c..6758fe3721 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -16,14 +16,14 @@ ] # Allows the type to be a string, but ensures that the string is a URL -MMTEB_CITATION = """@article{enevoldsen2025mmtebmassivemultilingualtext, - title={MMTEB: Massive Multilingual Text Embedding Benchmark}, - author={Kenneth Enevoldsen and Isaac Chung and Imene Kerboua and Márton Kardos and Ashwin Mathur and David Stap and Jay Gala and Wissam Siblini and Dominik Krzemiński and Genta Indra Winata and Saba Sturua and Saiteja Utpala and Mathieu Ciancone and Marion Schaeffer and Gabriel Sequeira and Diganta Misra and Shreeya Dhakal and Jonathan Rystrøm and Roman Solomatin and Ömer Çağatan and Akash Kundu and Martin Bernstorff and Shitao Xiao and Akshita Sukhlecha and Bhavish Pahwa and Rafał Poświata and Kranthi Kiran GV and Shawon Ashraf and Daniel Auras and Björn Plüster and Jan Philipp Harries and Loïc Magne and Isabelle Mohr and Mariya Hendriksen and Dawei Zhu and Hippolyte Gisserot-Boukhlef and Tom Aarsen and Jan Kostkan and Konrad Wojtasik and Taemin Lee and Marek Šuppa and Crystina Zhang and Roberta Rocca and Mohammed Hamdy and Andrianos Michail and John Yang and Manuel Faysse and Aleksei Vatolin and Nandan Thakur and Manan Dey and Dipam Vasani and Pranjal Chitale and Simone Tedeschi and Nguyen Tai and Artem Snegirev and Michael Günther and Mengzhou Xia and Weijia Shi and Xing Han Lù and Jordan Clive and Gayatri Krishnakumar and Anna Maksimova and Silvan Wehrli and Maria Tikhonova and Henil Panchal and Aleksandr Abramov and Malte Ostendorff and Zheng Liu and Simon Clematide and Lester James Miranda and Alena Fenogenova and Guangyu Song and Ruqiya Bin Safi and Wen-Ding Li and Alessia Borghini and Federico Cassano and Hongjin Su and Jimmy Lin and Howard Yen and Lasse Hansen and Sara Hooker and Chenghao Xiao and Vaibhav Adlakha and Orion Weller and Siva Reddy and Niklas Muennighoff}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2502.13595}, - year={2025}, - url={https://arxiv.org/abs/2502.13595}, - doi = {10.48550/arXiv.2502.13595}, +MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext, + author = {Kenneth Enevoldsen and Isaac Chung and Imene Kerboua and Márton Kardos and Ashwin Mathur and David Stap and Jay Gala and Wissam Siblini and Dominik Krzemiński and Genta Indra Winata and Saba Sturua and Saiteja Utpala and Mathieu Ciancone and Marion Schaeffer and Gabriel Sequeira and Diganta Misra and Shreeya Dhakal and Jonathan Rystrøm and Roman Solomatin and Ömer Çağatan and Akash Kundu and Martin Bernstorff and Shitao Xiao and Akshita Sukhlecha and Bhavish Pahwa and Rafał Poświata and Kranthi Kiran GV and Shawon Ashraf and Daniel Auras and Björn Plüster and Jan Philipp Harries and Loïc Magne and Isabelle Mohr and Mariya Hendriksen and Dawei Zhu and Hippolyte Gisserot-Boukhlef and Tom Aarsen and Jan Kostkan and Konrad Wojtasik and Taemin Lee and Marek Šuppa and Crystina Zhang and Roberta Rocca and Mohammed Hamdy and Andrianos Michail and John Yang and Manuel Faysse and Aleksei Vatolin and Nandan Thakur and Manan Dey and Dipam Vasani and Pranjal Chitale and Simone Tedeschi and Nguyen Tai and Artem Snegirev and Michael Günther and Mengzhou Xia and Weijia Shi and Xing Han Lù and Jordan Clive and Gayatri Krishnakumar and Anna Maksimova and Silvan Wehrli and Maria Tikhonova and Henil Panchal and Aleksandr Abramov and Malte Ostendorff and Zheng Liu and Simon Clematide and Lester James Miranda and Alena Fenogenova and Guangyu Song and Ruqiya Bin Safi and Wen-Ding Li and Alessia Borghini and Federico Cassano and Hongjin Su and Jimmy Lin and Howard Yen and Lasse Hansen and Sara Hooker and Chenghao Xiao and Vaibhav Adlakha and Orion Weller and Siva Reddy and Niklas Muennighoff}, + doi = {10.48550/arXiv.2502.13595}, + journal = {arXiv preprint arXiv:2502.13595}, + publisher = {arXiv}, + title = {MMTEB: Massive Multilingual Text Embedding Benchmark}, + url = {https://arxiv.org/abs/2502.13595}, + year = {2025}, }""" MTEB_EN = Benchmark( @@ -179,15 +179,17 @@ This page is an adaptation of the [old MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard_legacy). We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benchmark_name=MTEB%28eng%2C+v2%29) instead, as it uses updated versions of the task, making it notably faster to run and resolving [a known bug](https://github.com/embeddings-benchmark/mteb/issues/1156) in existing tasks. This benchmark also removes datasets common for fine-tuning, such as MSMARCO, which makes model performance scores more comparable. However, generally, both benchmarks provide similar estimates. """, - citation="""@article{muennighoff2022mteb, - author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\i}c and Reimers, Nils}, - title = {MTEB: Massive Text Embedding Benchmark}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2210.07316}, - year = {2022} - url = {https://arxiv.org/abs/2210.07316}, - doi = {10.48550/ARXIV.2210.07316}, -}""", + citation=r""" +@article{muennighoff2022mteb, + author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\i}c and Reimers, Nils}, + doi = {10.48550/ARXIV.2210.07316}, + journal = {arXiv preprint arXiv:2210.07316}, + publisher = {arXiv}, + title = {MTEB: Massive Text Embedding Benchmark}, + url = {https://arxiv.org/abs/2210.07316}, + year = {2022}, +} +""", contacts=["Muennighoff"], ) @@ -235,15 +237,17 @@ ), description="A Russian version of the Massive Text Embedding Benchmark with a number of novel Russian tasks in all task categories of the original MTEB.", reference="https://aclanthology.org/2023.eacl-main.148/", - citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb, - title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design}, - author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov}, - year={2024}, - eprint={2408.12503}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2408.12503}, -}""", + citation=r""" +@misc{snegirev2024russianfocusedembeddersexplorationrumteb, + archiveprefix = {arXiv}, + author = {Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov}, + eprint = {2408.12503}, + primaryclass = {cs.CL}, + title = {The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design}, + url = {https://arxiv.org/abs/2408.12503}, + year = {2024}, +} +""", ) MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark( @@ -258,14 +262,16 @@ ), description="Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions.", reference="https://arxiv.org/abs/2403.15246", - citation="""@misc{weller2024followir, - title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, - author={Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, - year={2024}, - eprint={2403.15246}, - archivePrefix={arXiv}, - primaryClass={cs.IR} -}""", + citation=r""" +@misc{weller2024followir, + archiveprefix = {arXiv}, + author = {Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, + eprint = {2403.15246}, + primaryclass = {cs.IR}, + title = {FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, + year = {2024}, +} +""", ) MTEB_RETRIEVAL_LAW = Benchmark( @@ -332,12 +338,14 @@ including bitext mining and classification via retrieval-augmented contexts. """, reference="https://arxiv.org/pdf/2406.07424", - citation="""@article{winata2024miners, - title={MINERS: Multilingual Language Models as Semantic Retrievers}, - author={Winata, Genta Indra and Zhang, Ruochen and Adelani, David Ifeoluwa}, - journal={arXiv preprint arXiv:2406.07424}, - year={2024} -}""", + citation=r""" +@article{winata2024miners, + author = {Winata, Genta Indra and Zhang, Ruochen and Adelani, David Ifeoluwa}, + journal = {arXiv preprint arXiv:2406.07424}, + title = {MINERS: Multilingual Language Models as Semantic Retrievers}, + year = {2024}, +} +""", ) SEB = Benchmark( @@ -383,13 +391,15 @@ ), description="A curated selection of tasks coverering the Scandinavian languages; Danish, Swedish and Norwegian, including Bokmål and Nynorsk.", reference="https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/", - citation="""@inproceedings{enevoldsen2024scandinavian, - title={The Scandinavian Embedding Benchmarks: Comprehensive Assessment of Multilingual and Monolingual Text Embedding}, - author={Enevoldsen, Kenneth and Kardos, M{\'a}rton and Muennighoff, Niklas and Nielbo, Kristoffer}, - booktitle={Advances in Neural Information Processing Systems}, - year={2024}, - url={https://nips.cc/virtual/2024/poster/97869} -}""", + citation=r""" +@inproceedings{enevoldsen2024scandinavian, + author = {Enevoldsen, Kenneth and Kardos, M{\'a}rton and Muennighoff, Niklas and Nielbo, Kristoffer}, + booktitle = {Advances in Neural Information Processing Systems}, + title = {The Scandinavian Embedding Benchmarks: Comprehensive Assessment of Multilingual and Monolingual Text Embedding}, + url = {https://nips.cc/virtual/2024/poster/97869}, + year = {2024}, +} +""", contacts=["KennethEnevoldsen", "x-tabdeveloping", "Samoed"], ) @@ -412,15 +422,17 @@ ), description="CoIR: A Comprehensive Benchmark for Code Information Retrieval Models", reference="https://github.com/CoIR-team/coir", - citation="""@misc{li2024coircomprehensivebenchmarkcode, - title={CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, - author={Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, - year={2024}, - eprint={2407.02883}, - archivePrefix={arXiv}, - primaryClass={cs.IR}, - url={https://arxiv.org/abs/2407.02883}, -}""", + citation=r""" +@misc{li2024coircomprehensivebenchmarkcode, + archiveprefix = {arXiv}, + author = {Xiangyang Li and Kuicai Dong and Yi Quan Lee and Wei Xia and Yichun Yin and Hao Zhang and Yong Liu and Yasheng Wang and Ruiming Tang}, + eprint = {2407.02883}, + primaryclass = {cs.IR}, + title = {CoIR: A Comprehensive Benchmark for Code Information Retrieval Models}, + url = {https://arxiv.org/abs/2407.02883}, + year = {2024}, +} +""", ) RAR_b = Benchmark( @@ -449,12 +461,14 @@ ), description="A benchmark to evaluate reasoning capabilities of retrievers.", reference="https://arxiv.org/abs/2404.06347", - citation="""@article{xiao2024rar, - title={RAR-b: Reasoning as Retrieval Benchmark}, - author={Xiao, Chenghao and Hudson, G Thomas and Al Moubayed, Noura}, - journal={arXiv preprint arXiv:2404.06347}, - year={2024} -}""", + citation=r""" +@article{xiao2024rar, + author = {Xiao, Chenghao and Hudson, G Thomas and Al Moubayed, Noura}, + journal = {arXiv preprint arXiv:2404.06347}, + title = {RAR-b: Reasoning as Retrieval Benchmark}, + year = {2024}, +} +""", contacts=["gowitheflow-1998"], ) @@ -502,15 +516,17 @@ ), description="MTEB-French, a French expansion of the original benchmark with high-quality native French datasets.", reference="https://arxiv.org/abs/2405.20468", - citation="""@misc{ciancone2024mtebfrenchresourcesfrenchsentence, - title={MTEB-French: Resources for French Sentence Embedding Evaluation and Analysis}, - author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, - year={2024}, - eprint={2405.20468}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2405.20468}, -}""", + citation=r""" +@misc{ciancone2024mtebfrenchresourcesfrenchsentence, + archiveprefix = {arXiv}, + author = {Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini}, + eprint = {2405.20468}, + primaryclass = {cs.CL}, + title = {MTEB-French: Resources for French Sentence Embedding Evaluation and Analysis}, + url = {https://arxiv.org/abs/2405.20468}, + year = {2024}, +} +""", contacts=["imenelydiaker"], ) @@ -551,15 +567,17 @@ ), description="A benchmark for text-embedding performance in German.", reference="https://arxiv.org/html/2401.02709v1", - citation="""@misc{wehrli2024germantextembeddingclustering, - title={German Text Embedding Clustering Benchmark}, - author={Silvan Wehrli and Bert Arnrich and Christopher Irrgang}, - year={2024}, - eprint={2401.02709}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2401.02709}, -}""", + citation=r""" +@misc{wehrli2024germantextembeddingclustering, + archiveprefix = {arXiv}, + author = {Silvan Wehrli and Bert Arnrich and Christopher Irrgang}, + eprint = {2401.02709}, + primaryclass = {cs.CL}, + title = {German Text Embedding Clustering Benchmark}, + url = {https://arxiv.org/abs/2401.02709}, + year = {2024}, +} +""", contacts=["slvnwhrl"], ) @@ -625,12 +643,14 @@ consisting of titles and abstracts of scientific publications in Polish, which was used as the basis for two novel clustering tasks.""", # Rephrased from the abstract reference="https://arxiv.org/abs/2405.10138", - citation="""@article{poswiata2024plmteb, - title={PL-MTEB: Polish Massive Text Embedding Benchmark}, - author={Rafał Poświata and Sławomir Dadas and Michał Perełkiewicz}, - journal={arXiv preprint arXiv:2405.10138}, - year={2024} -}""", + citation=r""" +@article{poswiata2024plmteb, + author = {Rafał Poświata and Sławomir Dadas and Michał Perełkiewicz}, + journal = {arXiv preprint arXiv:2405.10138}, + title = {PL-MTEB: Polish Massive Text Embedding Benchmark}, + year = {2024}, +} +""", contacts=["rafalposwiata"], ) @@ -1092,12 +1112,14 @@ featuring documents of varying length and dispersed target information. """, # Pieced together from paper abstract. reference="https://arxiv.org/abs/2404.12096v2", - citation="""@article{zhu2024longembed, - title={LongEmbed: Extending Embedding Models for Long Context Retrieval}, - author={Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, - journal={arXiv preprint arXiv:2404.12096}, - year={2024} -}""", + citation=r""" +@article{zhu2024longembed, + author = {Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, + journal = {arXiv preprint arXiv:2404.12096}, + title = {LongEmbed: Extending Embedding Models for Long Context Retrieval}, + year = {2024}, +} +""", ) BRIGHT = Benchmark( @@ -1111,12 +1133,14 @@ naturally occurring and carefully curated human data. """, reference="https://brightbenchmark.github.io/", - citation="""@article{su2024bright, - title={Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, - author={Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others}, - journal={arXiv preprint arXiv:2407.12883}, - year={2024} -}""", + citation=r""" +@article{su2024bright, + author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others}, + journal = {arXiv preprint arXiv:2407.12883}, + title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, + year = {2024}, +} +""", ) BRIGHT_LONG = Benchmark( @@ -1138,12 +1162,14 @@ This is the long version of the benchmark, which only filter longer documents. """, reference="https://brightbenchmark.github.io/", - citation="""@article{su2024bright, - title={Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, - author={Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others}, - journal={arXiv preprint arXiv:2407.12883}, - year={2024} -}""", + citation=r""" +@article{su2024bright, + author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others}, + journal = {arXiv preprint arXiv:2407.12883}, + title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, + year = {2024}, +} +""", ) CODE_RAG = Benchmark( @@ -1158,15 +1184,17 @@ ), description="A benchmark for evaluating code retrieval augmented generation, testing models' ability to retrieve relevant programming solutions, tutorials and documentation.", reference="https://arxiv.org/abs/2406.14497", - citation="""@misc{wang2024coderagbenchretrievalaugmentcode, - title={CodeRAG-Bench: Can Retrieval Augment Code Generation?}, - author={Zora Zhiruo Wang and Akari Asai and Xinyan Velocity Yu and Frank F. Xu and Yiqing Xie and Graham Neubig and Daniel Fried}, - year={2024}, - eprint={2406.14497}, - archivePrefix={arXiv}, - primaryClass={cs.SE}, - url={https://arxiv.org/abs/2406.14497}, -}""", + citation=r""" +@misc{wang2024coderagbenchretrievalaugmentcode, + archiveprefix = {arXiv}, + author = {Zora Zhiruo Wang and Akari Asai and Xinyan Velocity Yu and Frank F. Xu and Yiqing Xie and Graham Neubig and Daniel Fried}, + eprint = {2406.14497}, + primaryclass = {cs.SE}, + title = {CodeRAG-Bench: Can Retrieval Augment Code Generation?}, + url = {https://arxiv.org/abs/2406.14497}, + year = {2024}, +} +""", ) BEIR = Benchmark( @@ -1192,11 +1220,12 @@ + get_tasks(tasks=["MSMARCO"], languages=["eng"], eval_splits=["dev"]), description="BEIR is a heterogeneous benchmark containing diverse IR tasks. It also provides a common and easy framework for evaluation of your NLP-based retrieval models within the benchmark.", reference="https://arxiv.org/abs/2104.08663", - citation="""@article{thakur2021beir, - title={Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models}, - author={Thakur, Nandan and Reimers, Nils and R{\"u}ckl{\'e}, Andreas and Srivastava, Abhishek and Gurevych, Iryna}, - journal={arXiv preprint arXiv:2104.08663}, - year={2021} + citation=r""" +@article{thakur2021beir, + author = {Thakur, Nandan and Reimers, Nils and R{\"u}ckl{\'e}, Andreas and Srivastava, Abhishek and Gurevych, Iryna}, + journal = {arXiv preprint arXiv:2104.08663}, + title = {Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models}, + year = {2021}, } """, ) @@ -1279,14 +1308,16 @@ ), description="The Chinese Massive Text Embedding Benchmark (C-MTEB) is a comprehensive benchmark for Chinese text embeddings covering 6 tasks and 35 datasets.", reference="https://github.com/FlagOpen/FlagEmbedding/tree/master/research/C_MTEB", - citation="""@misc{c-pack, - title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, - author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff}, - year={2023}, - eprint={2309.07597}, - archivePrefix={arXiv}, - primaryClass={cs.CL} -}""", + citation=r""" +@misc{c-pack, + archiveprefix = {arXiv}, + author = {Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff}, + eprint = {2309.07597}, + primaryclass = {cs.CL}, + title = {C-Pack: Packaged Resources To Advance General Chinese Embedding}, + year = {2023}, +} +""", ) FA_MTEB = Benchmark( @@ -1408,12 +1439,14 @@ ), description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", reference="https://arxiv.org/abs/2412.00532", - citation="""@article{kasmaee2024chemteb, - title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain}, - author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, - journal={arXiv preprint arXiv:2412.00532}, - year={2024} -}""", + citation=r""" +@article{kasmaee2024chemteb, + author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal = {arXiv preprint arXiv:2412.00532}, + title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain}, + year = {2024}, +} +""", ) BEIR_NL = Benchmark( @@ -1441,15 +1474,17 @@ "translation.", reference="https://arxiv.org/abs/2412.08329", contacts=["nikolay-banar"], - citation="""@misc{banar2024beirnlzeroshotinformationretrieval, - title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, - author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, - year={2024}, - eprint={2412.08329}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2412.08329}, -}""", + citation=r""" +@misc{banar2024beirnlzeroshotinformationretrieval, + archiveprefix = {arXiv}, + author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans}, + eprint = {2412.08329}, + primaryclass = {cs.CL}, + title = {BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language}, + url = {https://arxiv.org/abs/2412.08329}, + year = {2024}, +} +""", ) MIEB_common_tasks = [ @@ -1602,15 +1637,17 @@ document undestanding, visual STS, and CV-centric tasks.""", reference="https://arxiv.org/abs/2504.10471", contacts=["gowitheflow-1998", "isaac-chung"], - citation="""@article{xiao2025mieb, - author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, - title = {MIEB: Massive Image Embedding Benchmark}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2504.10471}, - year = {2025}, - url = {https://arxiv.org/abs/2504.10471}, - doi = {10.48550/ARXIV.2504.10471}, - }""", + citation=r""" +@article{xiao2025mieb, + author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, + doi = {10.48550/ARXIV.2504.10471}, + journal = {arXiv preprint arXiv:2504.10471}, + publisher = {arXiv}, + title = {MIEB: Massive Image Embedding Benchmark}, + url = {https://arxiv.org/abs/2504.10471}, + year = {2025}, +} +""", ) MIEB_MULTILINGUAL = Benchmark( @@ -1635,15 +1672,17 @@ datasets + the multilingual parts of VisualSTS-b and VisualSTS-16.""", reference="https://arxiv.org/abs/2504.10471", contacts=["gowitheflow-1998", "isaac-chung"], - citation="""@article{xiao2025mieb, - author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, - title = {MIEB: Massive Image Embedding Benchmark}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2504.10471}, - year = {2025}, - url = {https://arxiv.org/abs/2504.10471}, - doi = {10.48550/ARXIV.2504.10471}, - }""", + citation=r""" +@article{xiao2025mieb, + author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, + doi = {10.48550/ARXIV.2504.10471}, + journal = {arXiv preprint arXiv:2504.10471}, + publisher = {arXiv}, + title = {MIEB: Massive Image Embedding Benchmark}, + url = {https://arxiv.org/abs/2504.10471}, + year = {2025}, +} +""", ) MIEB_LITE = Benchmark( @@ -1717,15 +1756,17 @@ relative rank of models.""", reference="https://arxiv.org/abs/2504.10471", contacts=["gowitheflow-1998", "isaac-chung"], - citation="""@article{xiao2025mieb, - author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, - title = {MIEB: Massive Image Embedding Benchmark}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2504.10471}, - year = {2025}, - url = {https://arxiv.org/abs/2504.10471}, - doi = {10.48550/ARXIV.2504.10471}, - }""", + citation=r""" +@article{xiao2025mieb, + author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, + doi = {10.48550/ARXIV.2504.10471}, + journal = {arXiv preprint arXiv:2504.10471}, + publisher = {arXiv}, + title = {MIEB: Massive Image Embedding Benchmark}, + url = {https://arxiv.org/abs/2504.10471}, + year = {2025}, +} +""", ) MIEB_IMG = Benchmark( @@ -1787,15 +1828,17 @@ ), description="A image-only version of MIEB(Multilingual) that consists of 49 tasks.", reference="https://arxiv.org/abs/2504.10471", - citation="""@article{xiao2025mieb, - author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, - title = {MIEB: Massive Image Embedding Benchmark}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2504.10471}, - year = {2025}, - url = {https://arxiv.org/abs/2504.10471}, - doi = {10.48550/ARXIV.2504.10471}, - }""", + citation=r""" +@article{xiao2025mieb, + author = {Chenghao Xiao and Isaac Chung and Imene Kerboua and Jamie Stirling and Xin Zhang and Márton Kardos and Roman Solomatin and Noura Al Moubayed and Kenneth Enevoldsen and Niklas Muennighoff}, + doi = {10.48550/ARXIV.2504.10471}, + journal = {arXiv preprint arXiv:2504.10471}, + publisher = {arXiv}, + title = {MIEB: Massive Image Embedding Benchmark}, + url = {https://arxiv.org/abs/2504.10471}, + year = {2025}, +} +""", contacts=["gowitheflow-1998", "isaac-chung"], ) @@ -1811,12 +1854,14 @@ ), description='"Built-Bench" is an ongoing effort aimed at evaluating text embedding models in the context of built asset management, spanning over various dicsiplines such as architeture, engineering, constrcution, and operations management of the built environment.', reference="https://arxiv.org/abs/2411.12056", - citation="""@article{shahinmoghadam2024benchmarking, - title={Benchmarking pre-trained text embedding models in aligning built asset information}, - author={Shahinmoghadam, Mehrzad and Motamedi, Ali}, - journal={arXiv preprint arXiv:2411.12056}, - year={2024} -}""", + citation=r""" +@article{shahinmoghadam2024benchmarking, + author = {Shahinmoghadam, Mehrzad and Motamedi, Ali}, + journal = {arXiv preprint arXiv:2411.12056}, + title = {Benchmarking pre-trained text embedding models in aligning built asset information}, + year = {2024}, +} +""", contacts=["mehrzadshm"], ) @@ -1849,13 +1894,15 @@ ), description="A benchmark for evaluating text embedding models on Russian data.", reference="https://github.com/avidale/encodechka", - citation="""@misc{dale_encodechka, - author = "Dale, David", - title = "Russian rating of sentence encoders", - editor = "habr.com", - url = "https://habr.com/ru/articles/669674/", - month = {June}, - year = {2022}, - note = {[Online; posted 12-June-2022]}, -}""", + citation=r""" +@misc{dale_encodechka, + author = {Dale, David}, + editor = {habr.com}, + month = {June}, + note = {[Online; posted 12-June-2022]}, + title = {Russian rating of sentence encoders}, + url = {https://habr.com/ru/articles/669674/}, + year = {2022}, +} +""", ) diff --git a/tests/test_citation_formatting.py b/tests/test_citation_formatting.py new file mode 100644 index 0000000000..284ae9a7df --- /dev/null +++ b/tests/test_citation_formatting.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import bibtexparser +import pytest +from bibtexparser.bwriter import BibTexWriter + +import mteb +from mteb.abstasks import AbsTask +from mteb.benchmarks.benchmark import Benchmark + + +def format_bibtex(bibtex_str: str) -> str | None: + parser = bibtexparser.bparser.BibTexParser( + common_strings=True, ignore_nonstandard_types=False, interpolate_strings=False + ) + + bib_database = bibtexparser.loads(bibtex_str, parser) + if not bib_database.entries: + return None + + writer = BibTexWriter() + writer.indent = " " + writer.comma_first = False + writer.add_trailing_comma = True + + return writer.write(bib_database).strip() + + +@pytest.fixture(params=mteb.get_tasks()) +def task(request): + return request.param + + +def test_task_bibtex(task: AbsTask): + task_name = task.metadata.name + bibtex_citation = task.metadata.bibtex_citation + + if not bibtex_citation or not bibtex_citation.strip(): + pytest.skip(f"Task {task_name} has no bibtex_citation") + bibtex_citation = bibtex_citation.strip() + + formatted_bibtex = format_bibtex(bibtex_citation) + assert ( + formatted_bibtex is not None and formatted_bibtex == bibtex_citation + ), f"Wrong BibTeX citation formatting for task {task_name}" + + +@pytest.fixture(params=mteb.get_benchmarks()) +def benchmark(request): + return request.param + + +def test_benchmark_bibtex(benchmark: Benchmark): + benchmark_name = benchmark.name + bibtex_citation = benchmark.citation + + if not bibtex_citation or not bibtex_citation.strip(): + pytest.skip(f"Benchmark {benchmark_name} has no bibtex_citation") + bibtex_citation = bibtex_citation.strip() + + formatted_bibtex = format_bibtex(bibtex_citation) + assert ( + formatted_bibtex is not None and formatted_bibtex == bibtex_citation + ), f"Wrong BibTeX citation formatting for benchmark {benchmark_name}" From 42df41d21febb343f998fec3aba35f806a0b6001 Mon Sep 17 00:00:00 2001 From: Alexey Vatolin Date: Fri, 2 May 2025 00:32:47 +0200 Subject: [PATCH 4/6] fix format --- mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py b/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py index c6047b1218..f841012f5f 100644 --- a/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py +++ b/mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py @@ -56,9 +56,9 @@ def dataset_transform(self): # Pairs are in two halves en_sentences = all_texts[:mid_index] vie_sentences = all_texts[mid_index:] - assert len(en_sentences) == len( - vie_sentences - ), "The split does not result in equal halves." + assert len(en_sentences) == len(vie_sentences), ( + "The split does not result in equal halves." + ) # Downsample indices = list(range(len(en_sentences))) @@ -66,9 +66,9 @@ def dataset_transform(self): sample_indices = indices[:TEST_SAMPLES] en_sentences = [en_sentences[i] for i in sample_indices] vie_sentences = [vie_sentences[i] for i in sample_indices] - assert ( - len(en_sentences) == len(vie_sentences) == TEST_SAMPLES - ), f"Exceeded {TEST_SAMPLES} samples for 'test' split." + assert len(en_sentences) == len(vie_sentences) == TEST_SAMPLES, ( + f"Exceeded {TEST_SAMPLES} samples for 'test' split." + ) # Return dataset ds["test"] = datasets.Dataset.from_dict( From 34a9965607a2e7098cf1eb32a05bba92d07f9603 Mon Sep 17 00:00:00 2001 From: Alexey Vatolin Date: Fri, 2 May 2025 10:24:37 +0200 Subject: [PATCH 5/6] Fix tests --- pyproject.toml | 1 + tests/test_citation_formatting.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 481bd2c54c..b7b880a9c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ dev = [ "pytest-rerunfailures>=15.0", "iso639>=0.1.4", # used for tests/scripts/test_generate_model_meta.py "pre-commit>=4.1.0", +"bibtexparser>=1.4.3" # used for tests/test_citation_formatting.py ] codecarbon = ["codecarbon>=2.0.0,<3.0.0"] speedtask = [ diff --git a/tests/test_citation_formatting.py b/tests/test_citation_formatting.py index 284ae9a7df..dd2c731994 100644 --- a/tests/test_citation_formatting.py +++ b/tests/test_citation_formatting.py @@ -40,9 +40,9 @@ def test_task_bibtex(task: AbsTask): bibtex_citation = bibtex_citation.strip() formatted_bibtex = format_bibtex(bibtex_citation) - assert ( - formatted_bibtex is not None and formatted_bibtex == bibtex_citation - ), f"Wrong BibTeX citation formatting for task {task_name}" + assert formatted_bibtex is not None and formatted_bibtex == bibtex_citation, ( + f"Wrong BibTeX citation formatting for task {task_name}" + ) @pytest.fixture(params=mteb.get_benchmarks()) @@ -59,6 +59,6 @@ def test_benchmark_bibtex(benchmark: Benchmark): bibtex_citation = bibtex_citation.strip() formatted_bibtex = format_bibtex(bibtex_citation) - assert ( - formatted_bibtex is not None and formatted_bibtex == bibtex_citation - ), f"Wrong BibTeX citation formatting for benchmark {benchmark_name}" + assert formatted_bibtex is not None and formatted_bibtex == bibtex_citation, ( + f"Wrong BibTeX citation formatting for benchmark {benchmark_name}" + ) From a58eec91f5d994e74ceff18bc48f028e66cfb924 Mon Sep 17 00:00:00 2001 From: Alexey Vatolin Date: Fri, 2 May 2025 11:11:40 +0200 Subject: [PATCH 6/6] add formatting script --- scripts/format_citations.py | 342 ++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 scripts/format_citations.py diff --git a/scripts/format_citations.py b/scripts/format_citations.py new file mode 100644 index 0000000000..e1e9339e35 --- /dev/null +++ b/scripts/format_citations.py @@ -0,0 +1,342 @@ +from __future__ import annotations + +import ast +import logging +from pathlib import Path + +import bibtexparser +import typer +from bibtexparser.bwriter import BibTexWriter + +app = typer.Typer() + +logging.basicConfig( + level=logging.INFO, + format="%(levelname)s: %(message)s", +) +logger = logging.getLogger(__name__) + + +class KeywordLiteralFinder(ast.NodeVisitor): + def __init__(self, target_function_name: str, target_keyword_arg: str): + self.target_function_name = target_function_name + self.target_keyword_arg = target_keyword_arg + self.locations: list[tuple[int, int, int, int]] = [] + self.keyword_found_anywhere = False + + def visit_Call(self, node: ast.Call): + func_name = "" + if isinstance(node.func, ast.Name): + func_name = node.func.id + elif isinstance(node.func, ast.Attribute): + func_name = node.func.attr + + if func_name != self.target_function_name: + self.generic_visit(node) + return + + for keyword in node.keywords: + if keyword.arg != self.target_keyword_arg: + continue + self.keyword_found_anywhere = True + if not isinstance(keyword.value, ast.Constant) or not isinstance( + keyword.value.value, str + ): + continue + + if ( + keyword.value.end_lineno is not None + and keyword.value.end_col_offset is not None + ): + self.locations.append( + ( + keyword.value.lineno, + keyword.value.col_offset, + keyword.value.end_lineno, + keyword.value.end_col_offset, + ) + ) + else: + logger.warning( + f"Could not get end location for a {self.target_keyword_arg} string. Skipping this instance." + ) + self.generic_visit(node) + + +def extract_string_literal( + lines: list[str], location: tuple[int, int, int, int] +) -> tuple[str | None, str | None]: + start_line, start_col, end_line, end_col = location + start_line_0, end_line_0 = start_line - 1, end_line - 1 + + if ( + start_line_0 < 0 + or end_line_0 >= len(lines) + or start_col > len(lines[start_line_0]) + or end_col > len(lines[end_line_0]) + ): + return None, None + + if start_line == end_line: + literal = lines[start_line_0][start_col:end_col] + else: + first_line = lines[start_line_0][start_col:] + middle_lines = ( + lines[start_line_0 + 1 : end_line_0] + if start_line_0 + 1 <= end_line_0 + else [] + ) + last_line = lines[end_line_0][:end_col] + literal = "\n".join([first_line] + middle_lines + [last_line]) + + quote_types = ['"""', "'''", '"', "'"] + for quote in quote_types: + for prefix in [f"r{quote}", quote]: + if literal.startswith(prefix) and literal.endswith(quote): + return literal[len(prefix) : -len(quote)], quote + + return None, None + + +def format_bibtex(bibtex_str: str) -> str | None: + parser = bibtexparser.bparser.BibTexParser( + common_strings=True, ignore_nonstandard_types=False, interpolate_strings=False + ) + + try: + bib_database = bibtexparser.loads(bibtex_str, parser=parser) + if not bib_database.entries: + return None + bib_database.comments = [] + + writer = BibTexWriter() + writer.indent = " " + writer.comma_first = False + writer.add_trailing_comma = True + + return writer.write(bib_database).strip() + except Exception: + return None + + +def process_file( + file_path: Path, + target_function_name: str, + target_keyword_arg: str, + dry_run: bool, +) -> tuple[bool, bool, int, bool, bool]: + file_modified = file_error = skipped_no_keyword = skipped_no_locations = False + num_modified_in_file = 0 + replacements_for_file = [] + + try: + content = file_path.read_text() + tree = ast.parse(content, filename=str(file_path)) + + finder = KeywordLiteralFinder(target_function_name, target_keyword_arg) + finder.visit(tree) + + if not finder.keyword_found_anywhere: + return False, False, 0, True, False + + if not finder.locations: + return False, False, 0, False, True + + content_lines = content.splitlines() + content_lines_with_endings = content.splitlines(True) + + for location in finder.locations: + literal_value, quote_type = extract_string_literal(content_lines, location) + + if literal_value is None or quote_type is None: + logger.error( + f"In {file_path.name}: Could not extract {target_keyword_arg} string literal at {location}" + ) + file_error = True + continue + + literal_str = literal_value.strip() + if not literal_str: + continue + + formatted_literal = format_bibtex(literal_str) + if formatted_literal is None: + logger.error( + f"In {file_path.name}: Failed to parse/format {target_keyword_arg} at {location}" + ) + file_error = True + continue + + if literal_str == formatted_literal: + continue + + new_literal = f'r"""\n{formatted_literal}\n"""' + + start_line, start_col, end_line, end_col = location + start_char_index = ( + sum(len(line) for line in content_lines_with_endings[: start_line - 1]) + + start_col + ) + end_char_index = ( + sum(len(line) for line in content_lines_with_endings[: end_line - 1]) + + end_col + ) + + original_slice = content[start_char_index:end_char_index] + matched_prefix = "" + if original_slice.startswith(f"r{quote_type}"): + matched_prefix = "r" + + full_original_literal = ( + f"{matched_prefix}{quote_type}{literal_value}{quote_type}" + ) + + try: + actual_start = content.index(full_original_literal, start_char_index) + actual_end = actual_start + len(full_original_literal) + replacements_for_file.append((actual_start, actual_end, new_literal)) + num_modified_in_file += 1 + except ValueError: + logger.warning( + f"In {file_path.name}: Could not find exact original literal match for {target_keyword_arg} at {location}. Using offset-based replacement." + ) + replacements_for_file.append( + (start_char_index, end_char_index, new_literal) + ) + num_modified_in_file += 1 + + if replacements_for_file: + replacements_for_file.sort(key=lambda x: x[0], reverse=True) + new_content = content + for start, end, literal in replacements_for_file: + new_content = new_content[:start] + literal + new_content[end:] + + if not dry_run: + file_path.write_text(new_content) + file_modified = True + + except SyntaxError as e: + logger.error(f"SyntaxError in {file_path.name}: {e}") + file_error = True + except Exception as e: + logger.error(f"Unexpected error in {file_path.name}: {e}") + import traceback + + traceback.print_exc() + file_error = True + + return ( + file_modified, + file_error, + num_modified_in_file, + skipped_no_keyword, + skipped_no_locations, + ) + + +@app.command() +def tasks( + tasks_dir: Path = typer.Argument( + Path("mteb/tasks"), + exists=True, + file_okay=False, + dir_okay=True, + readable=True, + help="Directory containing MTEB task Python files.", + ), + dry_run: bool = typer.Option( + True, + "--dry-run", + help="Perform parsing and formatting but do not modify files.", + ), +): + modified_files = error_files = skipped_files = processed_files = bibtex_modified = 0 + task_files = sorted(tasks_dir.rglob("*.py")) + + if not task_files: + logger.error(f"No Python files found in {tasks_dir}") + raise typer.Exit(code=1) + + logger.info(f"Found {len(task_files)} Python files in {tasks_dir}. Processing...") + + for file_path in task_files: + if file_path.name == "__init__.py": + continue + + processed_files += 1 + file_modified, file_error, num_modified, no_keyword, no_locations = ( + process_file(file_path, "TaskMetadata", "bibtex_citation", dry_run) + ) + + if file_error: + error_files += 1 + elif file_modified: + modified_files += 1 + bibtex_modified += num_modified + else: + skipped_files += 1 + + logger.info("\n--- Summary ---") + logger.info(f"Processed Files: {processed_files}") + logger.info(f"Modified Files: {modified_files}") + logger.info(f"Skipped Files: {skipped_files}") + logger.info(f"Error Files: {error_files}") + logger.info(f"Total BibTeX Instances Modified: {bibtex_modified}") + + if dry_run: + logger.info("\nNOTE: Dry run mode was enabled. No files were actually changed.") + + if error_files > 0: + logger.warning("Errors occurred during processing. Check logs above.") + raise typer.Exit(code=1) + + +@app.command() +def benchmarks( + benchmarks_file: Path = typer.Argument( + Path("mteb/benchmarks/benchmarks.py"), + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Path to the benchmarks.py file.", + ), + dry_run: bool = typer.Option( + True, + "--dry-run", + help="Perform parsing and formatting but do not modify the file.", + ), +): + logger.info(f"Processing {benchmarks_file}...") + + file_modified, file_error, num_modified, no_keyword, no_locations = process_file( + benchmarks_file, "Benchmark", "citation", dry_run + ) + + if no_keyword: + logger.info(f"SKIPPED: No 'citation' keyword found in {benchmarks_file.name}.") + raise typer.Exit() + if no_locations: + logger.info( + f"SKIPPED: 'citation' keyword found, but no valid string literals detected in {benchmarks_file.name}." + ) + raise typer.Exit() + + logger.info("\n--- Summary ---") + logger.info(f"Processed File: {benchmarks_file.name}") + logger.info(f"Modified: {'Yes' if file_modified else 'No'}") + logger.info(f"Errors Occurred: {'Yes' if file_error else 'No'}") + logger.info(f"Citations Modified: {num_modified}") + + if dry_run and file_modified: + logger.info("\nNOTE: Dry run mode was enabled. File was not actually changed.") + + if file_error: + logger.warning("Errors occurred during processing. Check logs above.") + raise typer.Exit(code=1) + elif not file_modified and not file_error: + logger.info("No changes needed.") + + +if __name__ == "__main__": + app()