From 78bc9ac689736fc6999bcbcd9ec218d5104ef0b6 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 28 Sep 2024 17:36:58 +0200 Subject: [PATCH 1/3] fix: Add multilingual bench --- mteb/benchmarks/benchmarks.py | 143 ++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 18a3f5f130..95f1358149 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -544,3 +544,146 @@ def __getitem__(self, index): reference=None, citation=None, ) + + +MTEB_code = Benchmark( + name="MTEB(Multilingual)", + tasks=get_tasks( + tasks=[ + "BornholmBitextMining", + "BibleNLPBitextMining", + "BUCC.v2", + "DiaBlaBitextMining", + "FloresBitextMining", + "IN22GenBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "NorwegianCourtsBitextMining", + "NTREXBitextMining", + "NusaTranslationBitextMining", + "NusaXBitextMining", + "Tatoeba", + "BulgarianStoreReviewSentimentClassfication", + "CzechProductReviewSentimentClassification", + "GreekLegalCodeClassification", + "DBpediaClassification", + "FinancialPhrasebankClassification", + "PoemSentimentClassification", + "ToxicConversationsClassification", + "TweetTopicSingleClassification", + "EstonianValenceClassification", + "FilipinoShopeeReviewsClassification", + "GujaratiNewsClassification", + "SentimentAnalysisHindi", + "IndonesianIdClickbaitClassification", + "ItaCaseholdClassification", + "KorSarcasmClassification", + "KurdishSentimentClassification", + "MacedonianTweetSentimentClassification", + "AfriSentiClassification", + "AmazonCounterfactualClassification", + "CataloniaTweetClassification", + "CyrillicTurkicLangClassification", + "IndicLangClassification", + "MasakhaNEWSClassification", + "MassiveIntentClassification", + "MultiHateClassification", + "NordicLangClassification", + "NusaParagraphEmotionClassification", + "NusaX-senti", + "ScalaClassification", + "SwissJudgementClassification", + "NepaliNewsClassification", + "OdiaNewsClassification", + "PunjabiNewsClassification", + "PolEmo2.0-OUT", + "PAC", + "SinhalaNewsClassification", + "CSFDSKMovieReviewSentimentClassification", + "SiswatiNewsClassification", + "SlovakMovieReviewSentimentClassification", + "SwahiliNewsClassification", + "DalajClassification", + "TswanaNewsClassification", + "IsiZuluNewsClassification", + "WikiCitiesClustering", + "MasakhaNEWSClusteringS2S", + "RomaniBibleClustering", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering.v2", + "BiorxivClusteringP2P.v2", + "MedrxivClusteringP2P.v2", + "StackExchangeClustering.v2", + "AlloProfClusteringS2S.v2", + "HALClusteringS2S.v2", + "SIB200ClusteringS2S", + "WikiClusteringP2P.v2", + "SNLHierarchicalClusteringP2P", + "PlscClusteringP2P.v2", + "SwednClusteringP2P", + "CLSClusteringP2P.v2", + "StackOverflowQA", + "TwitterHjerneRetrieval", + "AILAStatutes", + "ArguAna", + "HagridRetrieval", + "LegalBenchCorporateLobbying", + "LEMBPasskeyRetrieval", + "SCIDOCS", + "SpartQA", + "TempReasonL1", + "TRECCOVID", + "WinoGrande", + "BelebeleRetrieval", + "MLQARetrieval", + "StatcanDialogueDatasetRetrieval", + "WikipediaRetrievalMultilingual", + "CovidRetrieval", + "Core17InstructionRetrieval", + "News21InstructionRetrieval", + "Robust04InstructionRetrieval", + "KorHateSpeechMLClassification", + "MalteseNewsClassification", + "MultiEURLEXMultilabelClassification", + "BrazilianToxicTweetsClassification", + "CEDRClassification", + "CTKFactsNLI", + "SprintDuplicateQuestions", + "TwitterURLCorpus", + "ArmenianParaphrasePC", + "indonli", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XNLI", + "PpcPC", + "TERRa", + "WebLINXCandidatesReranking", + "AlloprofReranking", + "VoyageMMarcoReranking", + "WikipediaRerankingMultilingual", + "RuBQReranking", + "T2Reranking", + "GermanSTSBenchmark", + "SICK-R", + "STS12", + "STS13", + "STS14", + "STS15", + "STSBenchmark", + "FaroeseSTS", + "FinParaSTS", + "JSICK", + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17", + "STS22.v2", + "STSES", + "STSB", + ], + ), + description="The Multilingual benchmarks from MMTEB. Currently under development.", + reference=None, + citation=None, +) From 7e44d4888f5a35859c08ca94a9e872462150e8bd Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 28 Sep 2024 17:57:23 +0200 Subject: [PATCH 2/3] Update mteb/benchmarks/benchmarks.py Co-authored-by: Niklas Muennighoff --- mteb/benchmarks/benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 95f1358149..ccb266aacb 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -546,7 +546,7 @@ def __getitem__(self, index): ) -MTEB_code = Benchmark( +MTEB_multilingual = Benchmark( name="MTEB(Multilingual)", tasks=get_tasks( tasks=[ From fc5f6c682683ececfa8b0d507c0928920d7665dc Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 28 Sep 2024 18:30:57 +0200 Subject: [PATCH 3/3] format --- scripts/mmteb_create_author_list.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/mmteb_create_author_list.ipynb b/scripts/mmteb_create_author_list.ipynb index c1b61015d3..3c9d99c2ed 100644 --- a/scripts/mmteb_create_author_list.ipynb +++ b/scripts/mmteb_create_author_list.ipynb @@ -907,7 +907,7 @@ " affiations[aff] = aff_id\n", " aff_id += 1\n", " aff_string += f\"{affiations[aff]},\"\n", - " \n", + "\n", " # remove last comma\n", " aff_string = aff_string[:-1]\n", "\n", @@ -936,7 +936,9 @@ "last_author2 = \"Siva\"\n", "last_author__ = [a for a in author_list if last_author2 in a][0]\n", "# remove from author list\n", - "author_list = [a for a in author_list if last_author1 not in a and last_author2 not in a]\n", + "author_list = [\n", + " a for a in author_list if last_author1 not in a and last_author2 not in a\n", + "]\n", "\n", "author_list.append(last_author__)\n", "author_list.append(last_author_)"