Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 70 additions & 40 deletions mteb/descriptive_stats/BitextMining/BUCC.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,66 +3,96 @@
"num_samples": 35000,
"number_of_characters": 146737556,
"unique_pairs": 35000,
"min_sentence1_length": 16,
"average_sentence1_length": 99.10931428571429,
"max_sentence1_length": 204,
"unique_sentence1": 34978,
"min_sentence2_length": 17,
"average_sentence2_length": 101.14933691422246,
"max_sentence2_length": 339,
"unique_sentence2": 1133728,
"sentence1_statistics": {
"total_text_length": 3468826,
"min_text_length": 16,
"average_text_length": 99.10931428571429,
"max_text_length": 204,
"unique_texts": 34978
},
"sentence2_statistics": {
"total_text_length": 143268730,
"min_text_length": 17,
"average_text_length": 101.14933691422246,
"max_text_length": 339,
"unique_texts": 1133728
},
"hf_subset_descriptive_stats": {
"de-en": {
"num_samples": 9580,
"number_of_characters": 41450074,
"unique_pairs": 9580,
"min_sentence1_length": 50,
"average_sentence1_length": 109.07974947807934,
"max_sentence1_length": 204,
"unique_sentence1": 9573,
"min_sentence2_length": 17,
"average_sentence2_length": 101.18043156531952,
"max_sentence2_length": 293,
"unique_sentence2": 397151
"sentence1_statistics": {
"total_text_length": 1044984,
"min_text_length": 50,
"average_text_length": 109.07974947807934,
"max_text_length": 204,
"unique_texts": 9573
},
"sentence2_statistics": {
"total_text_length": 40405090,
"min_text_length": 17,
"average_text_length": 101.18043156531952,
"max_text_length": 293,
"unique_texts": 397151
}
},
"fr-en": {
"num_samples": 9086,
"number_of_characters": 38272453,
"unique_pairs": 9086,
"min_sentence1_length": 43,
"average_sentence1_length": 99.31785163988553,
"max_sentence1_length": 174,
"unique_sentence1": 9081,
"min_sentence2_length": 21,
"average_sentence2_length": 101.05202942051324,
"max_sentence2_length": 319,
"unique_sentence2": 368033
"sentence1_statistics": {
"total_text_length": 902402,
"min_text_length": 43,
"average_text_length": 99.31785163988553,
"max_text_length": 174,
"unique_texts": 9081
},
"sentence2_statistics": {
"total_text_length": 37370051,
"min_text_length": 21,
"average_text_length": 101.05202942051324,
"max_text_length": 319,
"unique_texts": 368033
}
},
"ru-en": {
"num_samples": 14435,
"number_of_characters": 57904085,
"unique_pairs": 14435,
"min_sentence1_length": 40,
"average_sentence1_length": 101.6593003117423,
"max_sentence1_length": 186,
"unique_sentence1": 14425,
"min_sentence2_length": 21,
"average_sentence2_length": 101.06828784332406,
"max_sentence2_length": 339,
"unique_sentence2": 555503
"sentence1_statistics": {
"total_text_length": 1467452,
"min_text_length": 40,
"average_text_length": 101.6593003117423,
"max_text_length": 186,
"unique_texts": 14425
},
"sentence2_statistics": {
"total_text_length": 56436633,
"min_text_length": 21,
"average_text_length": 101.06828784332406,
"max_text_length": 339,
"unique_texts": 555503
}
},
"zh-en": {
"num_samples": 1899,
"number_of_characters": 9110944,
"unique_pairs": 1899,
"min_sentence1_length": 16,
"average_sentence1_length": 28.429699842022117,
"max_sentence1_length": 40,
"unique_sentence1": 1899,
"min_sentence2_length": 22,
"average_sentence2_length": 101.92388026108485,
"max_sentence2_length": 249,
"unique_sentence2": 88360
"sentence1_statistics": {
"total_text_length": 53988,
"min_text_length": 16,
"average_text_length": 28.429699842022117,
"max_text_length": 40,
"unique_texts": 1899
},
"sentence2_statistics": {
"total_text_length": 9056956,
"min_text_length": 22,
"average_text_length": 101.92388026108485,
"max_text_length": 249,
"unique_texts": 88360
}
}
}
}
Expand Down
64 changes: 40 additions & 24 deletions mteb/descriptive_stats/Classification/DKHateClassification.json
Original file line number Diff line number Diff line change
@@ -1,37 +1,53 @@
{
"test": {
"num_samples": 329,
"number_of_characters": 29011,
"number_texts_intersect_with_train": 4,
"min_text_length": 1,
"average_text_length": 88.17933130699087,
"max_text_length": 2434,
"unique_text": 326,
"unique_labels": 2,
"labels": {
"0": {
"count": 288
},
"1": {
"count": 41
"text_statistics": {
"total_text_length": 29011,
"min_text_length": 1,
"average_text_length": 88.17933130699087,
"max_text_length": 2434,
"unique_texts": 326
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 2,
"labels": {
"0": {
"count": 288
},
"1": {
"count": 41
}
}
}
},
"train": {
"num_samples": 2960,
"number_of_characters": 307722,
"number_texts_intersect_with_train": null,
"min_text_length": 1,
"average_text_length": 103.96013513513513,
"max_text_length": 5403,
"unique_text": 2902,
"unique_labels": 2,
"labels": {
"0": {
"count": 2576
},
"1": {
"count": 384
"text_statistics": {
"total_text_length": 307722,
"min_text_length": 1,
"average_text_length": 103.96013513513513,
"max_text_length": 5403,
"unique_texts": 2902
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 2,
"labels": {
"0": {
"count": 2576
},
"1": {
"count": 384
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
{
"train": {
"num_samples": 2264,
"number_of_characters": 276123,
"number_texts_intersect_with_train": null,
"min_text_length": 9,
"average_text_length": 121.96245583038869,
"max_text_length": 315,
"unique_text": 2259,
"unique_labels": 3,
"labels": {
"1": {
"count": 1391
},
"2": {
"count": 570
},
"0": {
"count": 303
"text_statistics": {
"total_text_length": 276123,
"min_text_length": 9,
"average_text_length": 121.96245583038869,
"max_text_length": 315,
"unique_texts": 2259
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 3,
"labels": {
"1": {
"count": 1391
},
"2": {
"count": 570
},
"0": {
"count": 303
}
}
}
}
Expand Down
38 changes: 23 additions & 15 deletions mteb/descriptive_stats/Classification/KorHateClassification.json
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
{
"train": {
"num_samples": 2048,
"number_of_characters": 79006,
"number_texts_intersect_with_train": null,
"min_text_length": 4,
"average_text_length": 38.5771484375,
"max_text_length": 130,
"unique_text": 2048,
"unique_labels": 3,
"labels": {
"1": {
"count": 648
},
"2": {
"count": 904
},
"0": {
"count": 496
"text_statistics": {
"total_text_length": 79006,
"min_text_length": 4,
"average_text_length": 38.5771484375,
"max_text_length": 130,
"unique_texts": 2048
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 3,
"labels": {
"1": {
"count": 648
},
"2": {
"count": 904
},
"0": {
"count": 496
}
}
}
}
Expand Down
51 changes: 28 additions & 23 deletions mteb/descriptive_stats/Clustering/SwednClustering.json
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
{
"all": {
"num_samples": 4,
"number_of_characters": 2048,
"min_text_length": 512,
"average_text_length": 512.0,
"max_text_length": 512,
"unique_texts": 2047,
"min_labels_per_text": 234,
"average_labels_per_text": 512.0,
"max_labels_per_text": 1164,
"unique_labels": 4,
"labels": {
"culture": {
"count": 294
},
"domestic news": {
"count": 1164
},
"economy": {
"count": 234
},
"sports": {
"count": 356
"test": {
"num_samples": 2048,
"text_statistics": {
"total_text_length": 3317163,
"min_text_length": 64,
"average_text_length": 1619.70849609375,
"max_text_length": 28913,
"unique_texts": 2047
},
"image_statistics": null,
"label_statistics": {
"min_labels_per_text": 1,
"average_label_per_text": 1.0,
"max_labels_per_text": 1,
"unique_labels": 4,
"labels": {
"culture": {
"count": 294
},
"domestic news": {
"count": 1164
},
"economy": {
"count": 234
},
"sports": {
"count": 356
}
}
}
}
Expand Down
Loading