diff --git a/mteb/abstasks/retrieval.py b/mteb/abstasks/retrieval.py index 79ac12f0be..c6d9c95260 100644 --- a/mteb/abstasks/retrieval.py +++ b/mteb/abstasks/retrieval.py @@ -242,7 +242,7 @@ def _process_split( instructions, ) ) - if hasattr(self, "top_ranked"): + if hasattr(self, "top_ranked") and self.top_ranked: self.dataset[subset][split]["top_ranked"] = self.top_ranked[ split ].copy() diff --git a/mteb/benchmarks/benchmarks/__init__.py b/mteb/benchmarks/benchmarks/__init__.py index 0f0db150fc..44b4fc6586 100644 --- a/mteb/benchmarks/benchmarks/__init__.py +++ b/mteb/benchmarks/benchmarks/__init__.py @@ -3,6 +3,7 @@ BEIR_NL, BRIGHT, BRIGHT_LONG, + BRIGHT_V1_1, BUILT_MTEB, C_MTEB, CHEMTEB, @@ -69,6 +70,7 @@ "BEIR_NL", "BRIGHT", "BRIGHT_LONG", + "BRIGHT_V1_1", "BUILT_MTEB", "CHEMTEB", "CHEMTEB_V1_1", diff --git a/mteb/benchmarks/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks/benchmarks.py index b2dbb7ec21..1eff929e33 100644 --- a/mteb/benchmarks/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks/benchmarks.py @@ -1330,6 +1330,46 @@ """, ) +BRIGHT_V1_1 = Benchmark( + name="BRIGHT(v1.1)", + display_name="Reasoning Retrieval", + tasks=get_tasks( + tasks=[ + "BrightBiologyRetrieval", + "BrightEarthScienceRetrieval", + "BrightEconomicsRetrieval", + "BrightPsychologyRetrieval", + "BrightRoboticsRetrieval", + "BrightStackoverflowRetrieval", + "BrightSustainableLivingRetrieval", + "BrightPonyRetrieval", + "BrightLeetcodeRetrieval", + "BrightAopsRetrieval", + "BrightTheoremQATheoremsRetrieval", + "BrightTheoremQAQuestionsRetrieval", + "BrightBiologyLongRetrieval", + "BrightEarthScienceLongRetrieval", + "BrightEconomicsLongRetrieval", + "BrightPsychologyLongRetrieval", + "BrightRoboticsLongRetrieval", + "BrightStackoverflowLongRetrieval", + "BrightSustainableLivingLongRetrieval", + "BrightPonyLongRetrieval", + ], + ), + description="v1.1 refactors the BRIGHT into a different tasks and added prompt to individual tasks.", + reference="https://brightbenchmark.github.io/", + citation=r""" +@article{su2024bright, + author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others}, + journal = {arXiv preprint arXiv:2407.12883}, + title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, + year = {2024}, +} +""", +) + + CODE_RAG = Benchmark( name="CodeRAG", tasks=get_tasks( @@ -1781,8 +1821,7 @@ "TRECCOVID-NL", ], ), - description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated " - "translation.", + description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated translation.", reference="https://arxiv.org/abs/2412.08329", contacts=["nikolay-banar"], citation=r""" diff --git a/mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json new file mode 100644 index 0000000000..82db598f5b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 188113, + "number_of_characters": 141769714, + "documents_text_statistics": { + "total_text_length": 141734227, + "min_text_length": 58, + "average_text_length": 753.8974425803981, + "max_text_length": 7334, + "unique_texts": 176508 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 35487, + "min_text_length": 85, + "average_text_length": 319.7027027027027, + "max_text_length": 1167, + "unique_texts": 111 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 524, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 4.7207207207207205, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 111 + }, + "top_ranked_statistics": { + "num_top_ranked": 20264921, + "min_top_ranked_per_query": 176954, + "average_top_ranked_per_query": 182566.85585585586, + "max_top_ranked_per_query": 186176 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json new file mode 100644 index 0000000000..5b9b80ac84 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 627, + "number_of_characters": 19398082, + "documents_text_statistics": { + "total_text_length": 19344209, + "min_text_length": 142, + "average_text_length": 36916.42938931298, + "max_text_length": 1324201, + "unique_texts": 498 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 53873, + "min_text_length": 89, + "average_text_length": 523.0388349514564, + "max_text_length": 2195, + "unique_texts": 103 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 134, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.3009708737864079, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 134 + }, + "top_ranked_statistics": { + "num_top_ranked": 53972, + "min_top_ranked_per_query": 524, + "average_top_ranked_per_query": 524.0, + "max_top_ranked_per_query": 524 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json new file mode 100644 index 0000000000..d426e46e1b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 57462, + "number_of_characters": 18936054, + "documents_text_statistics": { + "total_text_length": 18882181, + "min_text_length": 1, + "average_text_length": 329.192994996426, + "max_text_length": 31130, + "unique_texts": 49434 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 53873, + "min_text_length": 89, + "average_text_length": 523.0388349514564, + "max_text_length": 2195, + "unique_texts": 103 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 374, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.6310679611650487, + "max_relevant_docs_per_query": 19, + "unique_relevant_docs": 374 + }, + "top_ranked_statistics": { + "num_top_ranked": 5907977, + "min_top_ranked_per_query": 57359, + "average_top_ranked_per_query": 57359.0, + "max_top_ranked_per_query": 57359 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json new file mode 100644 index 0000000000..fb4242607e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 717, + "number_of_characters": 41696684, + "documents_text_statistics": { + "total_text_length": 41641374, + "min_text_length": 28, + "average_text_length": 69286.81198003328, + "max_text_length": 2627262, + "unique_texts": 587 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 55310, + "min_text_length": 83, + "average_text_length": 476.8103448275862, + "max_text_length": 1565, + "unique_texts": 116 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 187, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6120689655172413, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 187 + }, + "top_ranked_statistics": { + "num_top_ranked": 69716, + "min_top_ranked_per_query": 601, + "average_top_ranked_per_query": 601.0, + "max_top_ranked_per_query": 601 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json new file mode 100644 index 0000000000..5cd03dc7fb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 121365, + "number_of_characters": 40478259, + "documents_text_statistics": { + "total_text_length": 40422949, + "min_text_length": 1, + "average_text_length": 333.3878959826473, + "max_text_length": 233622, + "unique_texts": 117633 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 55310, + "min_text_length": 83, + "average_text_length": 476.8103448275862, + "max_text_length": 1565, + "unique_texts": 116 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 609, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.25, + "max_relevant_docs_per_query": 23, + "unique_relevant_docs": 609 + }, + "top_ranked_statistics": { + "num_top_ranked": 14064884, + "min_top_ranked_per_query": 121249, + "average_top_ranked_per_query": 121249.0, + "max_top_ranked_per_query": 121249 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json new file mode 100644 index 0000000000..1ffe27db06 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 619, + "number_of_characters": 19993261, + "documents_text_statistics": { + "total_text_length": 19917079, + "min_text_length": 43, + "average_text_length": 38598.99031007752, + "max_text_length": 429507, + "unique_texts": 515 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 76182, + "min_text_length": 164, + "average_text_length": 739.6310679611651, + "max_text_length": 2223, + "unique_texts": 103 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 109, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.058252427184466, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 109 + }, + "top_ranked_statistics": { + "num_top_ranked": 53148, + "min_top_ranked_per_query": 516, + "average_top_ranked_per_query": 516.0, + "max_top_ranked_per_query": 516 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json new file mode 100644 index 0000000000..3eada003fb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 50323, + "number_of_characters": 19882579, + "documents_text_statistics": { + "total_text_length": 19806397, + "min_text_length": 1, + "average_text_length": 394.3926125049781, + "max_text_length": 39672, + "unique_texts": 40594 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 76182, + "min_text_length": 164, + "average_text_length": 739.6310679611651, + "max_text_length": 2223, + "unique_texts": 103 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 823, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.990291262135922, + "max_relevant_docs_per_query": 85, + "unique_relevant_docs": 823 + }, + "top_ranked_statistics": { + "num_top_ranked": 5172660, + "min_top_ranked_per_query": 50220, + "average_top_ranked_per_query": 50220.0, + "max_top_ranked_per_query": 50220 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json new file mode 100644 index 0000000000..1b74237eb6 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 414074, + "number_of_characters": 438348000, + "documents_text_statistics": { + "total_text_length": 438140779, + "min_text_length": 75, + "average_text_length": 1058.4849178125876, + "max_text_length": 103665, + "unique_texts": 413932 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 207221, + "min_text_length": 422, + "average_text_length": 1459.3028169014085, + "max_text_length": 3964, + "unique_texts": 142 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 262, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8450704225352113, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 216 + }, + "top_ranked_statistics": { + "num_top_ranked": 58744859, + "min_top_ranked_per_query": 412813, + "average_top_ranked_per_query": 413696.1901408451, + "max_top_ranked_per_query": 413923 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json new file mode 100644 index 0000000000..fa07bc4a03 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 689, + "number_of_characters": 2093720, + "documents_text_statistics": { + "total_text_length": 2050155, + "min_text_length": 28, + "average_text_length": 3553.1282495667247, + "max_text_length": 108885, + "unique_texts": 577 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 43565, + "min_text_length": 182, + "average_text_length": 388.9732142857143, + "max_text_length": 946, + "unique_texts": 112 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 769, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 6.866071428571429, + "max_relevant_docs_per_query": 12, + "unique_relevant_docs": 17 + }, + "top_ranked_statistics": { + "num_top_ranked": 64624, + "min_top_ranked_per_query": 577, + "average_top_ranked_per_query": 577.0, + "max_top_ranked_per_query": 577 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json new file mode 100644 index 0000000000..6bfcb67959 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 8006, + "number_of_characters": 2082980, + "documents_text_statistics": { + "total_text_length": 2039415, + "min_text_length": 5, + "average_text_length": 258.350012667849, + "max_text_length": 2583, + "unique_texts": 6183 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 43565, + "min_text_length": 182, + "average_text_length": 388.9732142857143, + "max_text_length": 946, + "unique_texts": 112 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 2519, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 22.491071428571427, + "max_relevant_docs_per_query": 32, + "unique_relevant_docs": 47 + }, + "top_ranked_statistics": { + "num_top_ranked": 884128, + "min_top_ranked_per_query": 7894, + "average_top_ranked_per_query": 7894.0, + "max_top_ranked_per_query": 7894 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json new file mode 100644 index 0000000000..e447b21c51 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 613, + "number_of_characters": 20489389, + "documents_text_statistics": { + "total_text_length": 20419376, + "min_text_length": 23, + "average_text_length": 39881.59375, + "max_text_length": 669575, + "unique_texts": 509 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 70013, + "min_text_length": 166, + "average_text_length": 693.1980198019802, + "max_text_length": 2334, + "unique_texts": 101 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 116, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1485148514851484, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 113 + }, + "top_ranked_statistics": { + "num_top_ranked": 51712, + "min_top_ranked_per_query": 512, + "average_top_ranked_per_query": 512.0, + "max_top_ranked_per_query": 512 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json new file mode 100644 index 0000000000..a7d46172d7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 52936, + "number_of_characters": 20372421, + "documents_text_statistics": { + "total_text_length": 20302408, + "min_text_length": 3, + "average_text_length": 384.26058483959497, + "max_text_length": 226941, + "unique_texts": 43756 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 70013, + "min_text_length": 166, + "average_text_length": 693.1980198019802, + "max_text_length": 2334, + "unique_texts": 101 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 742, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.346534653465347, + "max_relevant_docs_per_query": 59, + "unique_relevant_docs": 738 + }, + "top_ranked_statistics": { + "num_top_ranked": 5336335, + "min_top_ranked_per_query": 52835, + "average_top_ranked_per_query": 52835.0, + "max_top_ranked_per_query": 52835 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json new file mode 100644 index 0000000000..114ef3ff49 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 609, + "number_of_characters": 18386897, + "documents_text_statistics": { + "total_text_length": 18166762, + "min_text_length": 117, + "average_text_length": 35761.34251968504, + "max_text_length": 3589928, + "unique_texts": 505 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 220135, + "min_text_length": 165, + "average_text_length": 2179.5544554455446, + "max_text_length": 19341, + "unique_texts": 101 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 106, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0495049504950495, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 106 + }, + "top_ranked_statistics": { + "num_top_ranked": 51308, + "min_top_ranked_per_query": 508, + "average_top_ranked_per_query": 508.0, + "max_top_ranked_per_query": 508 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json new file mode 100644 index 0000000000..d52826976e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 62062, + "number_of_characters": 18167360, + "documents_text_statistics": { + "total_text_length": 17947225, + "min_text_length": 1, + "average_text_length": 289.6535724084505, + "max_text_length": 28637, + "unique_texts": 40431 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 220135, + "min_text_length": 165, + "average_text_length": 2179.5544554455446, + "max_text_length": 19341, + "unique_texts": 101 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 553, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.475247524752476, + "max_relevant_docs_per_query": 36, + "unique_relevant_docs": 553 + }, + "top_ranked_statistics": { + "num_top_ranked": 6258061, + "min_top_ranked_per_query": 61961, + "average_top_ranked_per_query": 61961.0, + "max_top_ranked_per_query": 61961 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json new file mode 100644 index 0000000000..fa0691a820 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 1975, + "number_of_characters": 184326754, + "documents_text_statistics": { + "total_text_length": 184175475, + "min_text_length": 41, + "average_text_length": 99125.65931108719, + "max_text_length": 9182738, + "unique_texts": 1846 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 151279, + "min_text_length": 185, + "average_text_length": 1292.982905982906, + "max_text_length": 12432, + "unique_texts": 117 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 129, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1025641025641026, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 125 + }, + "top_ranked_statistics": { + "num_top_ranked": 217386, + "min_top_ranked_per_query": 1858, + "average_top_ranked_per_query": 1858.0, + "max_top_ranked_per_query": 1858 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json new file mode 100644 index 0000000000..9a9c84ef25 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 107198, + "number_of_characters": 183652816, + "documents_text_statistics": { + "total_text_length": 183501537, + "min_text_length": 1, + "average_text_length": 1713.6703710275399, + "max_text_length": 4000, + "unique_texts": 66270 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 151279, + "min_text_length": 185, + "average_text_length": 1292.982905982906, + "max_text_length": 12432, + "unique_texts": 117 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 819, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.0, + "max_relevant_docs_per_query": 59, + "unique_relevant_docs": 816 + }, + "top_ranked_statistics": { + "num_top_ranked": 12528477, + "min_top_ranked_per_query": 107081, + "average_top_ranked_per_query": 107081.0, + "max_top_ranked_per_query": 107081 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json new file mode 100644 index 0000000000..02b6c8928b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json @@ -0,0 +1,35 @@ +{ + "long": { + "num_samples": 662, + "number_of_characters": 21154322, + "documents_text_statistics": { + "total_text_length": 21080575, + "min_text_length": 30, + "average_text_length": 38051.579422382674, + "max_text_length": 5732344, + "unique_texts": 551 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 73747, + "min_text_length": 158, + "average_text_length": 682.8425925925926, + "max_text_length": 2843, + "unique_texts": 108 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 129, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1944444444444444, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 129 + }, + "top_ranked_statistics": { + "num_top_ranked": 59832, + "min_top_ranked_per_query": 554, + "average_top_ranked_per_query": 554.0, + "max_top_ranked_per_query": 554 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json new file mode 100644 index 0000000000..0761ba1adf --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 60900, + "number_of_characters": 20971763, + "documents_text_statistics": { + "total_text_length": 20898016, + "min_text_length": 1, + "average_text_length": 343.7626003421503, + "max_text_length": 158296, + "unique_texts": 50142 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 73747, + "min_text_length": 158, + "average_text_length": 682.8425925925926, + "max_text_length": 2843, + "unique_texts": 108 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 604, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.592592592592593, + "max_relevant_docs_per_query": 59, + "unique_relevant_docs": 604 + }, + "top_ranked_statistics": { + "num_top_ranked": 6565536, + "min_top_ranked_per_query": 60792, + "average_top_ranked_per_query": 60792.0, + "max_top_ranked_per_query": 60792 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json new file mode 100644 index 0000000000..0f29b0aa5d --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 188207, + "number_of_characters": 141817604, + "documents_text_statistics": { + "total_text_length": 141734227, + "min_text_length": 58, + "average_text_length": 753.8974425803981, + "max_text_length": 7334, + "unique_texts": 176508 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 83377, + "min_text_length": 12, + "average_text_length": 406.7170731707317, + "max_text_length": 1255, + "unique_texts": 201 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 469, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.299019607843137, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 234 + }, + "top_ranked_statistics": { + "num_top_ranked": 37946536, + "min_top_ranked_per_query": 176970, + "average_top_ranked_per_query": 185105.05365853658, + "max_top_ranked_per_query": 188176 + } + } +} diff --git a/mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json new file mode 100644 index 0000000000..cb6204368d --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json @@ -0,0 +1,35 @@ +{ + "standard": { + "num_samples": 23904, + "number_of_characters": 20825122, + "documents_text_statistics": { + "total_text_length": 20797224, + "min_text_length": 74, + "average_text_length": 872.4033726246906, + "max_text_length": 19104, + "unique_texts": 23839 + }, + "documents_image_statistics": null, + "queries_text_statistics": { + "total_text_length": 27898, + "min_text_length": 13, + "average_text_length": 429.2, + "max_text_length": 1255, + "unique_texts": 65 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 126, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9384615384615385, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 95 + }, + "top_ranked_statistics": { + "num_top_ranked": 1549535, + "min_top_ranked_per_query": 23839, + "average_top_ranked_per_query": 23839.0, + "max_top_ranked_per_query": 23839 + } + } +} diff --git a/mteb/models/model_implementations/bge_models.py b/mteb/models/model_implementations/bge_models.py index e16fb537ff..87abeccab1 100644 --- a/mteb/models/model_implementations/bge_models.py +++ b/mteb/models/model_implementations/bge_models.py @@ -6,7 +6,29 @@ from .e5_instruct import E5_MISTRAL_TRAINING_DATA -model_prompts = {"query": "Represent this sentence for searching relevant passages: "} +model_prompts = { + "query": "Represent this sentence for searching relevant passages: ", + "BrightBiologyRetrieval-query": "Represent this biology post for searching relevant passages: ", + "BrightEarthScienceRetrieval-query": "Represent this earth_science post for searching relevant passages: ", + "BrightEconomicsRetrieval-query": "Represent this economics post for searching relevant passages: ", + "BrightPsychologyRetrieval-query": "Represent this psychology post for searching relevant passages: ", + "BrightRoboticsRetrieval-query": "Represent this robotics post for searching relevant passages: ", + "BrightStackoverflowRetrieval-query": "Represent this stackoverflow post for searching relevant passages: ", + "BrightSustainableLivingRetrieval-query": "Represent this sustainable_living post for searching relevant passages: ", + "BrightPonyRetrieval-query": "Represent this Pony question for searching relevant passages: ", + "BrightLeetcodeRetrieval-query": "Represent this Coding problem for searching relevant examples: ", + "BrightAopsRetrieval-query": "Represent this Math problem for searching relevant examples: ", + "BrightTheoremQATheoremsRetrieval-query": "Represent this Math problem for searching relevant theorems: ", + "BrightTheoremQAQuestionsRetrieval-query": "Represent this Math problem for searching relevant examples: ", + "BrightBiologyLongRetrieval-query": "Represent this biology post for searching relevant documents: ", + "BrightEarthScienceLongRetrieval-query": "Represent this earth_science post for searching relevant documents: ", + "BrightEconomicsLongRetrieval-query": "Represent this economics post for searching relevant documents: ", + "BrightPsychologyLongRetrieval-query": "Represent this psychology post for searching relevant documents: ", + "BrightRoboticsLongRetrieval-query": "Represent this robotics post for searching relevant document: ", + "BrightStackoverflowLongRetrieval-query": "Represent this stackoverflow post for searching relevant document: ", + "BrightSustainableLivingLongRetrieval-query": "Represent this sustainable_living post for searching relevant documents: ", + "BrightPonyLongRetrieval-query": "Represent this Pony question for searching relevant documents: ", +} BGE_15_CITATION = """@misc{bge_embedding, title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff}, diff --git a/mteb/models/model_implementations/bm25.py b/mteb/models/model_implementations/bm25.py index dec2b6d698..9f7e92ad20 100644 --- a/mteb/models/model_implementations/bm25.py +++ b/mteb/models/model_implementations/bm25.py @@ -13,7 +13,6 @@ from mteb.types import ( CorpusDatasetType, EncodeKwargs, - InstructionDatasetType, QueryDatasetType, RetrievalOutputType, TopRankedDocumentsType, @@ -80,7 +79,6 @@ def search( hf_subset: str, top_k: int, encode_kwargs: EncodeKwargs, - instructions: InstructionDatasetType | None = None, top_ranked: TopRankedDocumentsType | None = None, ) -> RetrievalOutputType: logger.info("Encoding Queries...") @@ -103,13 +101,17 @@ def search( query_results = queries_results[qi] scores = queries_scores[qi] doc_id_to_score = {} + query_documents = ( + top_ranked[qid] if top_ranked and qid in top_ranked else None + ) # Iterate over results - for ri in range(len(query_results)): - doc_idx = query_results[ri] - score = scores[ri] + for doc_idx, score in zip(query_results, scores): doc_id = self.corpus_idx_to_id[doc_idx] + # handle reranking with a filtered set of documents + if query_documents is not None and doc_id not in query_documents: + continue doc_id_to_score[doc_id] = float(score) results[qid] = doc_id_to_score diff --git a/mteb/models/model_implementations/reasonir_model.py b/mteb/models/model_implementations/reasonir_model.py index 49bccaffe4..8b439daade 100644 --- a/mteb/models/model_implementations/reasonir_model.py +++ b/mteb/models/model_implementations/reasonir_model.py @@ -36,12 +36,76 @@ def instruction_template( "DuRetrieval", "QuoraRetrieval", } +_prompts_dict = { + "BrightBiologyRetrieval": { + "query": "Given a Biology post, retrieve relevant passages that help answer the post" + }, + "BrightEarthScienceRetrieval": { + "query": "Given a Earth Science post, retrieve relevant passages that help answer the post" + }, + "BrightEconomicsRetrieval": { + "query": "Given a Economics post, retrieve relevant passages that help answer the post" + }, + "BrightPsychologyRetrieval": { + "query": "Given a Psychology post, retrieve relevant passages that help answer the post" + }, + "BrightRoboticsRetrieval": { + "query": "Given a Robotics post, retrieve relevant passages that help answer the post" + }, + "BrightStackoverflowRetrieval": { + "query": "Given a Stackoverflow post, retrieve relevant passages that help answer the post" + }, + "BrightSustainableLivingRetrieval": { + "query": "Given a Sustainable Living post, retrieve relevant passages that help answer the post" + }, + "BrightPonyRetrieval": { + "query": "Given a Pony question, retrieve relevant passages that help answer the question" + }, + "BrightLeetcodeRetrieval": { + "query": "Given a coding problem, retrieve relevant examples that help answer the problem", + }, + "BrightAopsRetrieval": { + "query": "Given a Math problem, retrieve relevant examples that help answer the problem" + }, + "BrightTheoremQATheoremsRetrieval": { + "query": "Given a Math problem, retrieve relevant theorems that help answer the problem", + }, + "BrightTheoremQAQuestionsRetrieval": { + "query": "Given a Math problem, retrieve relevant examples that help answer the problem", + }, + "BrightBiologyLongRetrieval": { + "query": "Given a Biology post, retrieve relevant documents that help answer the post" + }, + "BrightEarthScienceLongRetrieval": { + "query": "Given a Earth Science post, retrieve relevant documents that help answer the post" + }, + "BrightEconomicsLongRetrieval": { + "query": "Given a Economics post, retrieve relevant documents that help answer the post" + }, + "BrightPsychologyLongRetrieval": { + "query": "Given a Psychology post, retrieve relevant documents that help answer the post" + }, + "BrightRoboticsLongRetrieval": { + "query": "Given a Robotics post, retrieve relevant documents that help answer the post" + }, + "BrightStackoverflowLongRetrieval": { + "query": "Given a Stackoverflow post, retrieve relevant documents that help answer the post" + }, + "BrightSustainableLivingLongRetrieval": { + "query": "Given a Sustainable Living post, retrieve relevant documents that help answer the post" + }, + "BrightPonyLongRetrieval": { + "query": "Given a Pony question, retrieve relevant documents that help answer the question" + }, +} + ReasonIR_8B = ModelMeta( loader=InstructSentenceTransformerModel, loader_kwargs=dict( instruction_template=instruction_template, trust_remote_code=True, + prompts_dict=_prompts_dict, ), name="ReasonIR/ReasonIR-8B", model_type=["dense"], diff --git a/mteb/tasks/retrieval/eng/__init__.py b/mteb/tasks/retrieval/eng/__init__.py index 247fdcce0b..6498fecee8 100644 --- a/mteb/tasks/retrieval/eng/__init__.py +++ b/mteb/tasks/retrieval/eng/__init__.py @@ -14,6 +14,28 @@ from .blink_it2i_retrieval import BLINKIT2IRetrieval from .blink_it2t_retrieval import BLINKIT2TRetrieval from .bright_retrieval import BrightLongRetrieval, BrightRetrieval +from .bright_v1_1_retrieval import ( + BrightAopsRetrieval, + BrightBiologyLongRetrieval, + BrightBiologyRetrieval, + BrightEarthScienceLongRetrieval, + BrightEarthScienceRetrieval, + BrightEconomicsLongRetrieval, + BrightEconomicsRetrieval, + BrightLeetcodeRetrieval, + BrightPonyLongRetrieval, + BrightPonyRetrieval, + BrightPsychologyLongRetrieval, + BrightPsychologyRetrieval, + BrightRoboticsLongRetrieval, + BrightRoboticsRetrieval, + BrightStackoverflowLongRetrieval, + BrightStackoverflowRetrieval, + BrightSustainableLivingLongRetrieval, + BrightSustainableLivingRetrieval, + BrightTheoremQAQuestionsRetrieval, + BrightTheoremQATheoremsRetrieval, +) from .built_bench_retrieval import BuiltBenchRetrieval from .chat_doctor_retrieval import ChatDoctorRetrieval from .chem_hotpot_qa_retrieval import ChemHotpotQARetrieval @@ -236,8 +258,28 @@ "BarExamQARetrieval", "BillSumCARetrieval", "BillSumUSRetrieval", + "BrightAopsRetrieval", + "BrightBiologyLongRetrieval", + "BrightBiologyRetrieval", + "BrightEarthScienceLongRetrieval", + "BrightEarthScienceRetrieval", + "BrightEconomicsLongRetrieval", + "BrightEconomicsRetrieval", + "BrightLeetcodeRetrieval", "BrightLongRetrieval", + "BrightPonyLongRetrieval", + "BrightPonyRetrieval", + "BrightPsychologyLongRetrieval", + "BrightPsychologyRetrieval", "BrightRetrieval", + "BrightRoboticsLongRetrieval", + "BrightRoboticsRetrieval", + "BrightStackoverflowLongRetrieval", + "BrightStackoverflowRetrieval", + "BrightSustainableLivingLongRetrieval", + "BrightSustainableLivingRetrieval", + "BrightTheoremQAQuestionsRetrieval", + "BrightTheoremQATheoremsRetrieval", "BuiltBenchRetrieval", "CIRRIT2IRetrieval", "CQADupstackAndroidRetrieval", diff --git a/mteb/tasks/retrieval/eng/bright_retrieval.py b/mteb/tasks/retrieval/eng/bright_retrieval.py index 82c56b8451..e99dbcc54a 100644 --- a/mteb/tasks/retrieval/eng/bright_retrieval.py +++ b/mteb/tasks/retrieval/eng/bright_retrieval.py @@ -1,3 +1,4 @@ +import warnings from collections import defaultdict import datasets @@ -86,6 +87,12 @@ def load_data(self) -> None: if self.data_loaded: return + warnings.warn( + "This task contains wrong prompts in the metadata. " + "Please use BRIGHT(v1.1) benchmark instead.", + category=DeprecationWarning, + ) + self.corpus, self.queries, self.relevant_docs = self.load_bright_data( path=self.metadata.dataset["path"], domains=list(self.metadata.eval_langs.keys()), @@ -104,7 +111,7 @@ class BrightRetrieval(AbsTaskRetrieval): "revision": "a75a0eb483f6a5233a6efc2d63d71540a4443dfb", }, reference="https://huggingface.co/datasets/xlangai/BRIGHT", - description="Bright retrieval dataset.", + description="BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval", type="Retrieval", category="t2t", eval_splits=["standard"], @@ -129,6 +136,7 @@ class BrightRetrieval(AbsTaskRetrieval): year = {2024}, } """, + superseded_by="BrightBiologyRetrieval", ) load_bright_data = load_bright_data load_data = load_data diff --git a/mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py b/mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py new file mode 100644 index 0000000000..473fb942d4 --- /dev/null +++ b/mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py @@ -0,0 +1,968 @@ +from __future__ import annotations + +from collections import defaultdict + +import datasets + +from mteb.abstasks import AbsTaskRetrieval +from mteb.abstasks.task_metadata import TaskMetadata + + +def load_bright_data( + path: str, + domain: str, + eval_splits: list, + cache_dir: str | None = None, + revision: str | None = None, +): + eval_split = eval_splits[0] + corpus_name = "documents" if eval_split == "standard" else "long_documents" + gold_ids_field = "gold_ids" if eval_split == "standard" else "gold_ids_long" + + corpus = dict.fromkeys(eval_splits) + queries = dict.fromkeys(eval_splits) + relevant_docs = dict.fromkeys(eval_splits) + top_ranked = dict.fromkeys(eval_splits) + + domain_corpus = datasets.load_dataset( + path, + corpus_name, + split=domain, + cache_dir=cache_dir, + revision=revision, + ) + examples = datasets.load_dataset( + path, + "examples", + split=domain, + cache_dir=cache_dir, + revision=revision, + ) + corpus[eval_split] = {e["id"]: {"text": e["content"]} for e in domain_corpus} + queries[eval_split] = {e["id"]: e["query"] for e in examples} + relevant_docs[eval_split] = defaultdict(dict) + top_ranked[eval_split] = defaultdict(list) + + # Get all document IDs + all_doc_ids = [e["id"] for e in domain_corpus] + have_excluded_ids = False + + for e in examples: + qid = e["id"] + gold_ids = e[gold_ids_field] + for gid in gold_ids: + relevant_docs[eval_split][qid].update({gid: 1}) + + # Create top_ranked: all documents except excluded_ids + excluded_ids = e.get("excluded_ids", []) + if excluded_ids and excluded_ids != ["N/A"]: + excluded_set = set(excluded_ids) + top_ranked[eval_split][qid] = [ + doc_id for doc_id in all_doc_ids if doc_id not in excluded_set + ] + have_excluded_ids = True + else: + # No exclusions, use all documents + top_ranked[eval_split][qid] = all_doc_ids + + corpus = datasets.DatasetDict(corpus) + queries = datasets.DatasetDict(queries) + relevant_docs = datasets.DatasetDict(relevant_docs) + if have_excluded_ids: + top_ranked = datasets.DatasetDict(top_ranked) + else: + top_ranked = None + return corpus, queries, relevant_docs, top_ranked + + +_BIBTEX_CITATION = r""" +@misc{su2024brightrealisticchallengingbenchmark, + archiveprefix = {arXiv}, + author = {Hongjin Su and Howard Yen and Mengzhou Xia and Weijia Shi and Niklas Muennighoff and Han-yu Wang and Haisu Liu and Quan Shi and Zachary S. Siegel and Michael Tang and Ruoxi Sun and Jinsung Yoon and Sercan O. Arik and Danqi Chen and Tao Yu}, + eprint = {2407.12883}, + primaryclass = {cs.CL}, + title = {BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval}, + url = {https://arxiv.org/abs/2407.12883}, + year = {2024}, +} +""" + + +class BrightBiologyRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightBiologyRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Biology StackExchange answers.", + type="Retrieval", + prompt={ + "query": "Represent this biology post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="biology", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightEarthScienceRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightEarthScienceRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Earth Science StackExchange answers.", + type="Retrieval", + prompt={ + "query": "Represent this earth_science post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="earth_science", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightEconomicsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightEconomicsRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Economics StackExchange answers.", + type="Retrieval", + prompt={ + "query": "Represent this economics post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="economics", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightPsychologyRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightPsychologyRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Psychology StackExchange answers.", + type="Retrieval", + prompt={ + "query": "Represent this psychology post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="psychology", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightRoboticsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightRoboticsRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Robotics StackExchange answers.", + type="Retrieval", + prompt={ + "query": "Represent this robotics post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="robotics", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightStackoverflowRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightStackoverflowRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Stack Overflow answers.", + type="Retrieval", + prompt={ + "query": "Represent this stackoverflow post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="stackoverflow", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightSustainableLivingRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightSustainableLivingRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Sustainable Living StackExchange answers.", + type="Retrieval", + prompt={ + "query": "Represent this sustainable_living post for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="sustainable_living", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightPonyRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightPonyRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of Pony programming language syntax documentation.", + type="Retrieval", + prompt={ + "query": "Represent this Pony question for searching relevant passages: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="pony", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightLeetcodeRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightLeetcodeRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of similar algorithmic problems based on shared solution techniques.", + type="Retrieval", + prompt={ + "query": "Represent this Coding problem for searching relevant examples: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="leetcode", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightAopsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightAopsRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of similar Math Olympiad problems from Art of Problem Solving.", + type="Retrieval", + prompt={ + "query": "Represent this Math problem for searching relevant examples: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="aops", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightTheoremQATheoremsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightTheoremQATheoremsRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of theorem definitions and proofs from ProofWiki.", + type="Retrieval", + prompt={ + "query": "Represent this Math problem for searching relevant theorems: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="theoremqa_theorems", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightTheoremQAQuestionsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightTheoremQAQuestionsRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of theorem definitions from ProofWiki given questions rephrased as real-world scenarios.", + type="Retrieval", + prompt={ + "query": "Represent this Math problem for searching relevant examples: " + }, + category="t2t", + eval_splits=["standard"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="theoremqa_questions", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightBiologyLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightBiologyLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Biology StackExchange answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this biology post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="biology", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightEarthScienceLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightEarthScienceLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Earth Science StackExchange answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this earth_science post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="earth_science", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightEconomicsLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightEconomicsLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Economics StackExchange answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this economics post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="economics", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightPsychologyLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightPsychologyLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Psychology StackExchange answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this psychology post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="psychology", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightRoboticsLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightRoboticsLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Robotics StackExchange answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this robotics post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="robotics", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightStackoverflowLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightStackoverflowLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Stack Overflow answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this stackoverflow post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="stackoverflow", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightSustainableLivingLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightSustainableLivingLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Sustainable Living StackExchange answers with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this sustainable_living post for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="sustainable_living", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True + + +class BrightPonyLongRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="BrightPonyLongRetrieval", + dataset={ + "path": "xlangai/BRIGHT", + "revision": "3066d29c9651a576c8aba4832d249807b181ecae", + }, + reference="https://huggingface.co/datasets/xlangai/BRIGHT", + description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of Pony programming language syntax documentation with long documents.", + type="Retrieval", + prompt={ + "query": "Represent this Pony question for searching relevant passages: " + }, + category="t2t", + eval_splits=["long"], + eval_langs=["eng-Latn"], + main_score="recall_at_1", + date=("2024-03-01", "2024-06-01"), + domains=["Non-fiction", "Written"], + task_subtypes=["Article retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + modalities=["text"], + bibtex_citation=_BIBTEX_CITATION, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus, self.queries, self.relevant_docs, self.top_ranked = ( + load_bright_data( + path=self.metadata.dataset["path"], + eval_splits=self.metadata.eval_splits, + domain="pony", + cache_dir=kwargs.get("cache_dir", None), + revision=self.metadata.dataset["revision"], + ) + ) + self.data_loaded = True