From 0d27be63ee3096622019f0fff7ad59dbbc8b174f Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Thu, 6 Mar 2025 10:52:43 +0000 Subject: [PATCH] add the rest --- .../Image/Compositionality/AROCocoOrder.json | 11 ++++++ .../Compositionality/AROFlickrOrder.json | 11 ++++++ .../AROVisualAttribution.json | 11 ++++++ .../Compositionality/AROVisualRelation.json | 11 ++++++ .../ImageCoDeT2IMultiChoice.json | 34 +++++++++++++++++ .../VisionCentric/BLINKIT2TMultiChoice.json | 34 +++++++++++++++++ .../Image/VisionCentric/CVBenchCount.json | 37 +++++++++++++++++++ .../Image/VisionCentric/CVBenchDepth.json | 25 +++++++++++++ .../Image/VisionCentric/CVBenchDistance.json | 25 +++++++++++++ .../Image/VisionCentric/CVBenchRelation.json | 25 +++++++++++++ 10 files changed, 224 insertions(+) create mode 100644 mteb/descriptive_stats/Image/Compositionality/AROCocoOrder.json create mode 100644 mteb/descriptive_stats/Image/Compositionality/AROFlickrOrder.json create mode 100644 mteb/descriptive_stats/Image/Compositionality/AROVisualAttribution.json create mode 100644 mteb/descriptive_stats/Image/Compositionality/AROVisualRelation.json create mode 100644 mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json create mode 100644 mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json create mode 100644 mteb/descriptive_stats/Image/VisionCentric/CVBenchCount.json create mode 100644 mteb/descriptive_stats/Image/VisionCentric/CVBenchDepth.json create mode 100644 mteb/descriptive_stats/Image/VisionCentric/CVBenchDistance.json create mode 100644 mteb/descriptive_stats/Image/VisionCentric/CVBenchRelation.json diff --git a/mteb/descriptive_stats/Image/Compositionality/AROCocoOrder.json b/mteb/descriptive_stats/Image/Compositionality/AROCocoOrder.json new file mode 100644 index 0000000000..a5ab5dd869 --- /dev/null +++ b/mteb/descriptive_stats/Image/Compositionality/AROCocoOrder.json @@ -0,0 +1,11 @@ +{ + "test": { + "num_samples": 25010, + "num_images": 25010, + "num_texts": 125050, + "num_unique_texts": 119661, + "min_text_length": 26, + "average_text_length": 51.520103958416634, + "max_text_length": 191 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Compositionality/AROFlickrOrder.json b/mteb/descriptive_stats/Image/Compositionality/AROFlickrOrder.json new file mode 100644 index 0000000000..fc3cef45e5 --- /dev/null +++ b/mteb/descriptive_stats/Image/Compositionality/AROFlickrOrder.json @@ -0,0 +1,11 @@ +{ + "test": { + "num_samples": 5000, + "num_images": 5000, + "num_texts": 25000, + "num_unique_texts": 23892, + "min_text_length": 11, + "average_text_length": 62.37296, + "max_text_length": 185 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Compositionality/AROVisualAttribution.json b/mteb/descriptive_stats/Image/Compositionality/AROVisualAttribution.json new file mode 100644 index 0000000000..d22c267d00 --- /dev/null +++ b/mteb/descriptive_stats/Image/Compositionality/AROVisualAttribution.json @@ -0,0 +1,11 @@ +{ + "test": { + "num_samples": 28748, + "num_images": 28748, + "num_texts": 57496, + "num_unique_texts": 52146, + "min_text_length": 27, + "average_text_length": 35.977772366773344, + "max_text_length": 61 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Compositionality/AROVisualRelation.json b/mteb/descriptive_stats/Image/Compositionality/AROVisualRelation.json new file mode 100644 index 0000000000..98129fdfe0 --- /dev/null +++ b/mteb/descriptive_stats/Image/Compositionality/AROVisualRelation.json @@ -0,0 +1,11 @@ +{ + "test": { + "num_samples": 23937, + "num_images": 23937, + "num_texts": 47874, + "num_unique_texts": 26706, + "min_text_length": 21, + "average_text_length": 34.826795337761624, + "max_text_length": 57 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json b/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json new file mode 100644 index 0000000000..f806953b68 --- /dev/null +++ b/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json @@ -0,0 +1,34 @@ +{ + "test": { + "number_of_characters": 236457, + "num_samples": 25322, + "num_queries": 2302, + "num_documents": 23020, + "min_document_length": 0, + "average_document_length": 0, + "max_document_length": 0, + "unique_documents": 0, + "min_document_image_width": 256, + "average_document_image_width": 256.0, + "max_document_image_width": 256, + "min_document_image_height": 256, + "average_document_image_height": 256.0, + "max_document_image_height": 256, + "num_document_images": 23020, + "min_query_length": 1, + "average_query_length": 102.71807124239791, + "max_query_length": 350, + "unique_queries": 2302, + "num_query_images": 0, + "min_query_image_width": 0, + "average_query_image_width": 0, + "max_query_image_width": 0, + "min_query_image_height": 0, + "average_query_image_height": 0, + "max_query_image_height": 0, + "min_relevant_docs_per_query": 10, + "average_relevant_docs_per_query": 10.0, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 10390 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json b/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json new file mode 100644 index 0000000000..d24a116c8e --- /dev/null +++ b/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json @@ -0,0 +1,34 @@ +{ + "test": { + "number_of_characters": 54272, + "num_samples": 813, + "num_queries": 793, + "num_documents": 20, + "min_document_length": 1, + "average_document_length": 5.8, + "max_document_length": 14, + "unique_documents": 20, + "min_document_image_width": 0, + "average_document_image_width": 0, + "max_document_image_width": 0, + "min_document_image_height": 0, + "average_document_image_height": 0, + "max_document_image_height": 0, + "num_document_images": 0, + "min_query_length": 22, + "average_query_length": 68.29255989911728, + "max_query_length": 135, + "unique_queries": 347, + "num_query_images": 793, + "min_query_image_width": 63, + "average_query_image_width": 515.0756620428751, + "max_query_image_width": 4096, + "min_query_image_height": 232, + "average_query_image_height": 722.6418663303909, + "max_query_image_height": 3226, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.849936948297604, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 20 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/CVBenchCount.json b/mteb/descriptive_stats/Image/VisionCentric/CVBenchCount.json new file mode 100644 index 0000000000..9a4d8077c7 --- /dev/null +++ b/mteb/descriptive_stats/Image/VisionCentric/CVBenchCount.json @@ -0,0 +1,37 @@ +{ + "test": { + "num_samples": 788, + "min_image_width": 200, + "average_image_width": 757.6789340101523, + "max_image_width": 2200, + "min_image_height": 181, + "average_image_height": 631.3147208121827, + "max_image_height": 2200, + "min_num_choices": 4, + "average_num_choices": 4.550761421319797, + "max_num_choices": 6, + "min_question_length": 30, + "average_question_length": 34.35406091370558, + "max_question_length": 45, + "answers": { + "2": { + "count": 169 + }, + "4": { + "count": 63 + }, + "3": { + "count": 167 + }, + "1": { + "count": 184 + }, + "0": { + "count": 182 + }, + "5": { + "count": 23 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/CVBenchDepth.json b/mteb/descriptive_stats/Image/VisionCentric/CVBenchDepth.json new file mode 100644 index 0000000000..1995597a46 --- /dev/null +++ b/mteb/descriptive_stats/Image/VisionCentric/CVBenchDepth.json @@ -0,0 +1,25 @@ +{ + "test": { + "num_samples": 600, + "min_image_width": 561, + "average_image_width": 1090.9616666666666, + "max_image_width": 1600, + "min_image_height": 427, + "average_image_height": 715.985, + "max_image_height": 900, + "min_num_choices": 2, + "average_num_choices": 2.0, + "max_num_choices": 2, + "min_question_length": 130, + "average_question_length": 136.04333333333332, + "max_question_length": 147, + "answers": { + "0": { + "count": 300 + }, + "1": { + "count": 300 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/CVBenchDistance.json b/mteb/descriptive_stats/Image/VisionCentric/CVBenchDistance.json new file mode 100644 index 0000000000..439aa253b6 --- /dev/null +++ b/mteb/descriptive_stats/Image/VisionCentric/CVBenchDistance.json @@ -0,0 +1,25 @@ +{ + "test": { + "num_samples": 600, + "min_image_width": 561, + "average_image_width": 1099.2883333333334, + "max_image_width": 1600, + "min_image_height": 427, + "average_image_height": 720.9983333333333, + "max_image_height": 900, + "min_num_choices": 2, + "average_num_choices": 2.0, + "max_num_choices": 2, + "min_question_length": 204, + "average_question_length": 212.40333333333334, + "max_question_length": 223, + "answers": { + "0": { + "count": 303 + }, + "1": { + "count": 297 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/CVBenchRelation.json b/mteb/descriptive_stats/Image/VisionCentric/CVBenchRelation.json new file mode 100644 index 0000000000..e0587321da --- /dev/null +++ b/mteb/descriptive_stats/Image/VisionCentric/CVBenchRelation.json @@ -0,0 +1,25 @@ +{ + "test": { + "num_samples": 650, + "min_image_width": 189, + "average_image_width": 546.3169230769231, + "max_image_width": 2200, + "min_image_height": 190, + "average_image_height": 448.4492307692308, + "max_image_height": 2200, + "min_num_choices": 2, + "average_num_choices": 2.0, + "max_num_choices": 2, + "min_question_length": 132, + "average_question_length": 181.45846153846153, + "max_question_length": 224, + "answers": { + "0": { + "count": 327 + }, + "1": { + "count": 323 + } + } + } +} \ No newline at end of file