From 540e92bd98f0af395f3d028ca857d4fd02779eba Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Sat, 15 Mar 2025 16:24:11 +0000 Subject: [PATCH 1/3] correct stats --- mteb/models/vista_models.py | 10 +++++----- .../Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py | 6 +++--- .../Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py | 6 +++--- .../Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py | 8 ++++---- .../Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py | 10 +++++----- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/mteb/models/vista_models.py b/mteb/models/vista_models.py index 4448bc0006..47382fae4a 100644 --- a/mteb/models/vista_models.py +++ b/mteb/models/vista_models.py @@ -249,8 +249,8 @@ def calculate_probs(self, text_embeddings, image_embeddings): release_date="2024-06-06", modalities=["image", "text"], n_parameters=196_000_000, - memory_usage_mb=748, - max_tokens=77, + memory_usage_mb=1631, + max_tokens=512, embed_dim=768, license=None, open_weights=True, @@ -275,9 +275,9 @@ def calculate_probs(self, text_embeddings, image_embeddings): revision="98db10b10d22620010d06f11733346e1c98c34aa", release_date="2024-06-06", modalities=["image", "text"], - n_parameters=None, - memory_usage_mb=None, - max_tokens=77, + n_parameters=872_909_505, + memory_usage_mb=4263, + max_tokens=8192, embed_dim=1024, license=None, open_weights=True, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index f15d1df8a8..8e537def6a 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -35,13 +35,13 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 534}, + "n_samples": {"test": 402}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 1200, - "num_queries": 534, + "num_documents": 804, + "num_queries": 402, "average_relevant_docs_per_query": 1, } }, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index b4e6c08472..6503cc5d3f 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -34,13 +34,13 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 923}, + "n_samples": {"test": 793}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 24, - "num_queries": 923, + "num_documents": 20, + "num_queries": 793, "average_relevant_docs_per_query": 1, } }, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py index d5661e2840..659dc3a059 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py @@ -42,9 +42,9 @@ class ROxfordEasyI2IMultiChoice(AbsTaskAny2AnyMultiChoice): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 516, + "num_documents": 4993, "num_queries": 70, - "average_relevant_docs_per_query": 43.3, + "average_relevant_docs_per_query": 44.5, } }, }, @@ -88,7 +88,7 @@ class ROxfordMediumI2IMultiChoice(AbsTaskAny2AnyMultiChoice): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 788, + "num_documents": 4993, "num_queries": 70, "average_relevant_docs_per_query": 78.9, } @@ -134,7 +134,7 @@ class ROxfordHardI2IMultiChoice(AbsTaskAny2AnyMultiChoice): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 685, + "num_documents": 4993, "num_queries": 70, "average_relevant_docs_per_query": 35.7, } diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py index 754111b594..8e5331752d 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py @@ -42,9 +42,9 @@ class RParisEasyI2IMultiChoice(AbsTaskAny2AnyMultiChoice): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 516, + "num_documents": 6322, "num_queries": 70, - "average_relevant_docs_per_query": 43.3, + "average_relevant_docs_per_query": 98.2, } }, }, @@ -88,9 +88,9 @@ class RParisMediumI2IMultiChoice(AbsTaskAny2AnyMultiChoice): "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 788, + "num_documents": 6322, "num_queries": 70, - "average_relevant_docs_per_query": 78.9, + "average_relevant_docs_per_query": 246.1, } }, }, @@ -136,7 +136,7 @@ class RParisHardI2IMultiChoice(AbsTaskAny2AnyMultiChoice): "average_query_length": 0.0, "num_documents": 685, "num_queries": 70, - "average_relevant_docs_per_query": 35.7, + "average_relevant_docs_per_query": 147.86, } }, }, From b401001208de9cf75b57deeddf9f65ce4cfd7fc8 Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Sat, 15 Mar 2025 17:24:20 +0000 Subject: [PATCH 2/3] update Any2AnyMultiChoice qrels stats compute logic --- mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py | 2 +- .../Image/Any2AnyRetrieval/ROxfordEasyI2IMultiChoice.json | 8 ++++---- .../Image/Any2AnyRetrieval/ROxfordHardI2IMultiChoice.json | 8 ++++---- .../Any2AnyRetrieval/ROxfordMediumI2IMultiChoice.json | 8 ++++---- .../Image/Any2AnyRetrieval/RParisEasyI2IMultiChoice.json | 8 ++++---- .../Image/Any2AnyRetrieval/RParisHardI2IMultiChoice.json | 8 ++++---- .../Any2AnyRetrieval/RParisMediumI2IMultiChoice.json | 8 ++++---- .../Image/Compositionality/ImageCoDeT2IMultiChoice.json | 8 ++++---- .../Image/VisionCentric/BLINKIT2IMultiChoice.json | 8 ++++---- .../Image/VisionCentric/BLINKIT2TMultiChoice.json | 8 ++++---- 10 files changed, 37 insertions(+), 37 deletions(-) diff --git a/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py b/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py index 6f501a7a6c..7076233500 100644 --- a/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py +++ b/mteb/abstasks/Image/AbsTaskAny2AnyMultiChoice.py @@ -534,7 +534,7 @@ def _calculate_metrics_from_split( # create a list of number of relevant docs per query queries_set = set(queries["id"]) qrels_lengths = [ - len(relevant_docs[qid]) + len([v for k, v in relevant_docs[qid].items() if v != 0]) for qid in tqdm.tqdm(relevant_docs.keys(), desc="qrels:") if qid in queries_set ] diff --git a/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordEasyI2IMultiChoice.json b/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordEasyI2IMultiChoice.json index 33ecaf442a..e26273628e 100644 --- a/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordEasyI2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordEasyI2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 256, "average_query_image_height": 256.0, "max_query_image_height": 256, - "min_relevant_docs_per_query": 4678, - "average_relevant_docs_per_query": 4937.957142857143, - "max_relevant_docs_per_query": 4991, + "min_relevant_docs_per_query": 0, + "average_relevant_docs_per_query": 43.27142857142857, + "max_relevant_docs_per_query": 248, "unique_relevant_docs": 4993 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordHardI2IMultiChoice.json b/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordHardI2IMultiChoice.json index a5d3b2e89a..a6b9a21ac5 100644 --- a/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordHardI2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordHardI2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 256, "average_query_image_height": 256.0, "max_query_image_height": 256, - "min_relevant_docs_per_query": 4688, - "average_relevant_docs_per_query": 4930.357142857143, - "max_relevant_docs_per_query": 4991, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 35.67142857142857, + "max_relevant_docs_per_query": 284, "unique_relevant_docs": 4993 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordMediumI2IMultiChoice.json b/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordMediumI2IMultiChoice.json index c37e4bf441..333bbe786d 100644 --- a/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordMediumI2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Any2AnyRetrieval/ROxfordMediumI2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 256, "average_query_image_height": 256.0, "max_query_image_height": 256, - "min_relevant_docs_per_query": 4919, - "average_relevant_docs_per_query": 4973.628571428571, - "max_relevant_docs_per_query": 4992, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 78.94285714285714, + "max_relevant_docs_per_query": 347, "unique_relevant_docs": 4993 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisEasyI2IMultiChoice.json b/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisEasyI2IMultiChoice.json index a3ef89080d..5cf0e5ee74 100644 --- a/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisEasyI2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisEasyI2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 256, "average_query_image_height": 256.0, "max_query_image_height": 256, - "min_relevant_docs_per_query": 5523, - "average_relevant_docs_per_query": 6071.957142857143, - "max_relevant_docs_per_query": 6273, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 98.2, + "max_relevant_docs_per_query": 199, "unique_relevant_docs": 6322 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisHardI2IMultiChoice.json b/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisHardI2IMultiChoice.json index 915583791d..87f882d612 100644 --- a/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisHardI2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisHardI2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 256, "average_query_image_height": 256.0, "max_query_image_height": 256, - "min_relevant_docs_per_query": 5874, - "average_relevant_docs_per_query": 6121.614285714286, - "max_relevant_docs_per_query": 6293, + "min_relevant_docs_per_query": 34, + "average_relevant_docs_per_query": 147.85714285714286, + "max_relevant_docs_per_query": 556, "unique_relevant_docs": 6322 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisMediumI2IMultiChoice.json b/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisMediumI2IMultiChoice.json index 716baa636d..95f4f9b84a 100644 --- a/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisMediumI2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Any2AnyRetrieval/RParisMediumI2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 256, "average_query_image_height": 256.0, "max_query_image_height": 256, - "min_relevant_docs_per_query": 5978, - "average_relevant_docs_per_query": 6219.814285714286, - "max_relevant_docs_per_query": 6322, + "min_relevant_docs_per_query": 76, + "average_relevant_docs_per_query": 246.05714285714285, + "max_relevant_docs_per_query": 636, "unique_relevant_docs": 6322 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json b/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json index 8fca63d6a5..4d36f88146 100644 --- a/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/Compositionality/ImageCoDeT2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 0, "average_query_image_height": 0, "max_query_image_height": 0, - "min_relevant_docs_per_query": 10, - "average_relevant_docs_per_query": 10.0, - "max_relevant_docs_per_query": 10, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, "unique_relevant_docs": 10390 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2IMultiChoice.json b/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2IMultiChoice.json index 99701caf5f..a290475a94 100644 --- a/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2IMultiChoice.json +++ b/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2IMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 254, "average_query_image_height": 875.3781094527363, "max_query_image_height": 5687, - "min_relevant_docs_per_query": 2, - "average_relevant_docs_per_query": 2.0, - "max_relevant_docs_per_query": 2, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, "unique_relevant_docs": 804 } -} +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json b/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json index 2baf1da26d..747d9238f1 100644 --- a/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json +++ b/mteb/descriptive_stats/Image/VisionCentric/BLINKIT2TMultiChoice.json @@ -26,9 +26,9 @@ "min_query_image_height": 232, "average_query_image_height": 722.6418663303909, "max_query_image_height": 3226, - "min_relevant_docs_per_query": 2, - "average_relevant_docs_per_query": 2.849936948297604, - "max_relevant_docs_per_query": 4, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, "unique_relevant_docs": 20 } -} +} \ No newline at end of file From d355a045ea9b8e28141ebe208c1447b133c1b0ab Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Sat, 15 Mar 2025 17:32:35 +0000 Subject: [PATCH 3/3] final correction --- .../Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py | 2 +- .../Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py | 2 +- .../Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py | 2 +- .../Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py | 6 +++--- .../Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py index 8e537def6a..83f2e5da78 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2IMultiChoice.py @@ -35,7 +35,7 @@ class BLINKIT2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 402}, + "n_samples": {"test": 1206}, "avg_character_length": { "test": { "average_document_length": 0.0, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py index 6503cc5d3f..fe487626d6 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/BLINKIT2TMultiChoice.py @@ -34,7 +34,7 @@ class BLINKIT2TMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 793}, + "n_samples": {"test": 813}, "avg_character_length": { "test": { "average_document_length": 0.0, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py index f9400ff280..6d60bf52c1 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ImageCoDeT2IMultiChoice.py @@ -34,7 +34,7 @@ class ImageCoDeT2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 2302}, + "n_samples": {"test": 25322}, "avg_character_length": { "test": { "average_document_length": 0.0, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py index 659dc3a059..5afaf6bd3a 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/ROxfordI2IMultiChoice.py @@ -37,7 +37,7 @@ class ROxfordEasyI2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 70}, + "n_samples": {"test": 5063}, "avg_character_length": { "test": { "average_document_length": 0.0, @@ -83,7 +83,7 @@ class ROxfordMediumI2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 70}, + "n_samples": {"test": 5063}, "avg_character_length": { "test": { "average_document_length": 0.0, @@ -129,7 +129,7 @@ class ROxfordHardI2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 70}, + "n_samples": {"test": 5063}, "avg_character_length": { "test": { "average_document_length": 0.0, diff --git a/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py b/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py index 8e5331752d..419afde02f 100644 --- a/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py +++ b/mteb/tasks/Image/Any2AnyMultiChoice/eng/RParisI2IMultiChoice.py @@ -37,7 +37,7 @@ class RParisEasyI2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 70}, + "n_samples": {"test": 6392}, "avg_character_length": { "test": { "average_document_length": 0.0, @@ -83,7 +83,7 @@ class RParisMediumI2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 70}, + "n_samples": {"test": 6392}, "avg_character_length": { "test": { "average_document_length": 0.0, @@ -129,12 +129,12 @@ class RParisHardI2IMultiChoice(AbsTaskAny2AnyMultiChoice): } """, descriptive_stats={ - "n_samples": {"test": 70}, + "n_samples": {"test": 6392}, "avg_character_length": { "test": { "average_document_length": 0.0, "average_query_length": 0.0, - "num_documents": 685, + "num_documents": 6322, "num_queries": 70, "average_relevant_docs_per_query": 147.86, }