From ceb2d45992f084e22ea4797e6a47821760ca601f Mon Sep 17 00:00:00 2001 From: zhangzeqing Date: Thu, 5 Jun 2025 13:42:18 +0800 Subject: [PATCH 1/3] add geoembedding results --- .../AmazonCounterfactualClassification.json | 95 +++++++++++ .../ArXivHierarchicalClusteringP2P.json | 46 +++++ .../ArXivHierarchicalClusteringS2S.json | 46 +++++ .../ArguAna.json | 158 ++++++++++++++++++ .../AskUbuntuDupQuestions.json | 26 +++ .../BIOSSES.json | 26 +++ .../Banking77Classification.json | 73 ++++++++ .../BiorxivClusteringP2P.v2.json | 34 ++++ .../CQADupstackGamingRetrieval.json | 158 ++++++++++++++++++ .../CQADupstackUnixRetrieval.json | 158 ++++++++++++++++++ .../ClimateFEVERHardNegatives.json | 158 ++++++++++++++++++ .../FEVERHardNegatives.json | 158 ++++++++++++++++++ .../FiQA2018.json | 158 ++++++++++++++++++ .../HotpotQAHardNegatives.json | 158 ++++++++++++++++++ .../ImdbClassification.json | 95 +++++++++++ .../MTOPDomainClassification.json | 73 ++++++++ .../MassiveIntentClassification.json | 73 ++++++++ .../MassiveScenarioClassification.json | 73 ++++++++ .../MedrxivClusteringP2P.v2.json | 34 ++++ .../MedrxivClusteringS2S.v2.json | 34 ++++ .../MindSmallReranking.json | 26 +++ .../SCIDOCS.json | 158 ++++++++++++++++++ .../SICK-R.json | 26 +++ .../STS12.json | 26 +++ .../STS13.json | 26 +++ .../STS14.json | 26 +++ .../STS15.json | 26 +++ .../STS17.json | 26 +++ .../STS22.v2.json | 26 +++ .../STSBenchmark.json | 26 +++ .../SprintDuplicateQuestions.json | 58 +++++++ .../StackExchangeClustering.v2.json | 34 ++++ .../StackExchangeClusteringP2P.v2.json | 34 ++++ .../SummEvalSummarization.v2.json | 24 +++ .../TRECCOVID.json | 158 ++++++++++++++++++ .../Touche2020Retrieval.v3.json | 158 ++++++++++++++++++ .../ToxicConversationsClassification.json | 95 +++++++++++ ...weetSentimentExtractionClassification.json | 73 ++++++++ .../TwentyNewsgroupsClustering.v2.json | 34 ++++ .../TwitterSemEval2015.json | 58 +++++++ .../TwitterURLCorpus.json | 58 +++++++ .../model_meta.json | 1 + 42 files changed, 3011 insertions(+) create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json create mode 100644 results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json new file mode 100644 index 0000000000..9cb8f31c6f --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json @@ -0,0 +1,95 @@ +{ + "dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205", + "task_name": "AmazonCounterfactualClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.969552, + "f1": 0.953324, + "f1_weighted": 0.970082, + "ap": 0.865487, + "ap_weighted": 0.865487, + "scores_per_experiment": [ + { + "accuracy": 0.974627, + "f1": 0.960898, + "f1_weighted": 0.975007, + "ap": 0.885243, + "ap_weighted": 0.885243 + }, + { + "accuracy": 0.968657, + "f1": 0.951959, + "f1_weighted": 0.969206, + "ap": 0.860985, + "ap_weighted": 0.860985 + }, + { + "accuracy": 0.970149, + "f1": 0.953358, + "f1_weighted": 0.9704, + "ap": 0.867193, + "ap_weighted": 0.867193 + }, + { + "accuracy": 0.973134, + "f1": 0.958711, + "f1_weighted": 0.973571, + "ap": 0.879084, + "ap_weighted": 0.879084 + }, + { + "accuracy": 0.968657, + "f1": 0.951432, + "f1_weighted": 0.969045, + "ap": 0.860918, + "ap_weighted": 0.860918 + }, + { + "accuracy": 0.974627, + "f1": 0.960683, + "f1_weighted": 0.974941, + "ap": 0.885414, + "ap_weighted": 0.885414 + }, + { + "accuracy": 0.973134, + "f1": 0.958255, + "f1_weighted": 0.973432, + "ap": 0.879339, + "ap_weighted": 0.879339 + }, + { + "accuracy": 0.977612, + "f1": 0.965116, + "f1_weighted": 0.97783, + "ap": 0.89816, + "ap_weighted": 0.89816 + }, + { + "accuracy": 0.956716, + "f1": 0.935379, + "f1_weighted": 0.957991, + "ap": 0.816612, + "ap_weighted": 0.816612 + }, + { + "accuracy": 0.958209, + "f1": 0.937447, + "f1_weighted": 0.959392, + "ap": 0.82192, + "ap_weighted": 0.82192 + } + ], + "main_score": 0.969552, + "hf_subset": "en", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 41.224671602249146, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json new file mode 100644 index 0000000000..05ad66cd70 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json @@ -0,0 +1,46 @@ +{ + "dataset_revision": "0bbdb47bcbe3a90093699aefeed338a0f28a7ee8", + "task_name": "ArXivHierarchicalClusteringP2P", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.734627, + 0.692796, + 0.674868, + 0.688748, + 0.65051, + 0.665281, + 0.712468, + 0.730769, + 0.662646, + 0.709435 + ], + "Level 1": [ + 0.617987, + 0.579289, + 0.609534, + 0.593757, + 0.592276, + 0.600809, + 0.58152, + 0.592046, + 0.608279, + 0.605467 + ] + }, + "v_measure": 0.645156, + "v_measure_std": 0.051646, + "main_score": 0.645156, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 14.439682960510254, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json new file mode 100644 index 0000000000..571d44a509 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json @@ -0,0 +1,46 @@ +{ + "dataset_revision": "b73bd54100e5abfa6e3a23dcafb46fe4d2438dc3", + "task_name": "ArXivHierarchicalClusteringS2S", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.676202, + 0.678907, + 0.683023, + 0.668376, + 0.707832, + 0.681361, + 0.641119, + 0.721545, + 0.712057, + 0.698236 + ], + "Level 1": [ + 0.589873, + 0.619368, + 0.600436, + 0.614185, + 0.587109, + 0.59567, + 0.604972, + 0.601508, + 0.587307, + 0.605132 + ] + }, + "v_measure": 0.643711, + "v_measure_std": 0.046575, + "main_score": 0.643711, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 13.052504301071167, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json new file mode 100644 index 0000000000..e2b87d17b0 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "c22ab2a51041ffd869aaddef7af8d8215647e41a", + "task_name": "ArguAna", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.55548, + "ndcg_at_3": 0.72962, + "ndcg_at_5": 0.76139, + "ndcg_at_10": 0.77873, + "ndcg_at_20": 0.7831, + "ndcg_at_100": 0.7831, + "ndcg_at_1000": 0.7831, + "map_at_1": 0.55548, + "map_at_3": 0.68789, + "map_at_5": 0.7056, + "map_at_10": 0.71281, + "map_at_20": 0.71413, + "map_at_100": 0.71413, + "map_at_1000": 0.71413, + "recall_at_1": 0.55548, + "recall_at_3": 0.84993, + "recall_at_5": 0.92674, + "recall_at_10": 0.98009, + "recall_at_20": 0.99644, + "recall_at_100": 0.99644, + "recall_at_1000": 0.99644, + "precision_at_1": 0.55548, + "precision_at_3": 0.28331, + "precision_at_5": 0.18535, + "precision_at_10": 0.09801, + "precision_at_20": 0.04982, + "precision_at_100": 0.00996, + "precision_at_1000": 0.001, + "mrr_at_1": 0.559033, + "mrr_at_3": 0.689426, + "mrr_at_5": 0.706994, + "mrr_at_10": 0.714324, + "mrr_at_20": 0.715647, + "mrr_at_100": 0.715647, + "mrr_at_1000": 0.715647, + "nauc_ndcg_at_1_max": -0.209535, + "nauc_ndcg_at_1_std": -0.242867, + "nauc_ndcg_at_1_diff1": 0.326084, + "nauc_ndcg_at_3_max": -0.188653, + "nauc_ndcg_at_3_std": -0.273796, + "nauc_ndcg_at_3_diff1": 0.30745, + "nauc_ndcg_at_5_max": -0.188827, + "nauc_ndcg_at_5_std": -0.266878, + "nauc_ndcg_at_5_diff1": 0.298944, + "nauc_ndcg_at_10_max": -0.201498, + "nauc_ndcg_at_10_std": -0.263769, + "nauc_ndcg_at_10_diff1": 0.295597, + "nauc_ndcg_at_20_max": -0.197053, + "nauc_ndcg_at_20_std": -0.254431, + "nauc_ndcg_at_20_diff1": 0.303115, + "nauc_ndcg_at_100_max": -0.197053, + "nauc_ndcg_at_100_std": -0.254431, + "nauc_ndcg_at_100_diff1": 0.303115, + "nauc_ndcg_at_1000_max": -0.197053, + "nauc_ndcg_at_1000_std": -0.254431, + "nauc_ndcg_at_1000_diff1": 0.303115, + "nauc_map_at_1_max": -0.209535, + "nauc_map_at_1_std": -0.242867, + "nauc_map_at_1_diff1": 0.326084, + "nauc_map_at_3_max": -0.196622, + "nauc_map_at_3_std": -0.263645, + "nauc_map_at_3_diff1": 0.308777, + "nauc_map_at_5_max": -0.197007, + "nauc_map_at_5_std": -0.25924, + "nauc_map_at_5_diff1": 0.304622, + "nauc_map_at_10_max": -0.201492, + "nauc_map_at_10_std": -0.258659, + "nauc_map_at_10_diff1": 0.304005, + "nauc_map_at_20_max": -0.200505, + "nauc_map_at_20_std": -0.256572, + "nauc_map_at_20_diff1": 0.30579, + "nauc_map_at_100_max": -0.200505, + "nauc_map_at_100_std": -0.256572, + "nauc_map_at_100_diff1": 0.30579, + "nauc_map_at_1000_max": -0.200505, + "nauc_map_at_1000_std": -0.256572, + "nauc_map_at_1000_diff1": 0.30579, + "nauc_recall_at_1_max": -0.209535, + "nauc_recall_at_1_std": -0.242867, + "nauc_recall_at_1_diff1": 0.326084, + "nauc_recall_at_3_max": -0.147861, + "nauc_recall_at_3_std": -0.324772, + "nauc_recall_at_3_diff1": 0.304131, + "nauc_recall_at_5_max": -0.113058, + "nauc_recall_at_5_std": -0.336329, + "nauc_recall_at_5_diff1": 0.250982, + "nauc_recall_at_10_max": -0.245492, + "nauc_recall_at_10_std": -0.397657, + "nauc_recall_at_10_diff1": 0.014255, + "nauc_recall_at_20_max": 0.370551, + "nauc_recall_at_20_std": 0.713396, + "nauc_recall_at_20_diff1": 0.076531, + "nauc_recall_at_100_max": 0.370551, + "nauc_recall_at_100_std": 0.713396, + "nauc_recall_at_100_diff1": 0.076531, + "nauc_recall_at_1000_max": 0.370551, + "nauc_recall_at_1000_std": 0.713396, + "nauc_recall_at_1000_diff1": 0.076531, + "nauc_precision_at_1_max": -0.209535, + "nauc_precision_at_1_std": -0.242867, + "nauc_precision_at_1_diff1": 0.326084, + "nauc_precision_at_3_max": -0.147861, + "nauc_precision_at_3_std": -0.324772, + "nauc_precision_at_3_diff1": 0.304131, + "nauc_precision_at_5_max": -0.113058, + "nauc_precision_at_5_std": -0.336329, + "nauc_precision_at_5_diff1": 0.250982, + "nauc_precision_at_10_max": -0.245492, + "nauc_precision_at_10_std": -0.397657, + "nauc_precision_at_10_diff1": 0.014255, + "nauc_precision_at_20_max": 0.370551, + "nauc_precision_at_20_std": 0.713396, + "nauc_precision_at_20_diff1": 0.076531, + "nauc_precision_at_100_max": 0.370551, + "nauc_precision_at_100_std": 0.713396, + "nauc_precision_at_100_diff1": 0.076531, + "nauc_precision_at_1000_max": 0.370551, + "nauc_precision_at_1000_std": 0.713396, + "nauc_precision_at_1000_diff1": 0.076531, + "nauc_mrr_at_1_max": -0.215027, + "nauc_mrr_at_1_std": -0.245771, + "nauc_mrr_at_1_diff1": 0.316544, + "nauc_mrr_at_3_max": -0.203939, + "nauc_mrr_at_3_std": -0.266166, + "nauc_mrr_at_3_diff1": 0.298534, + "nauc_mrr_at_5_max": -0.204936, + "nauc_mrr_at_5_std": -0.261841, + "nauc_mrr_at_5_diff1": 0.292964, + "nauc_mrr_at_10_max": -0.208877, + "nauc_mrr_at_10_std": -0.260564, + "nauc_mrr_at_10_diff1": 0.293042, + "nauc_mrr_at_20_max": -0.207931, + "nauc_mrr_at_20_std": -0.258485, + "nauc_mrr_at_20_diff1": 0.294766, + "nauc_mrr_at_100_max": -0.207931, + "nauc_mrr_at_100_std": -0.258485, + "nauc_mrr_at_100_diff1": 0.294766, + "nauc_mrr_at_1000_max": -0.207931, + "nauc_mrr_at_1000_std": -0.258485, + "nauc_mrr_at_1000_diff1": 0.294766, + "main_score": 0.77873, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 189.00718069076538, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json new file mode 100644 index 0000000000..74da54aeba --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "2000358ca161889fa9c082cb41daa8dcfb161a54", + "task_name": "AskUbuntuDupQuestions", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "map": 0.64535, + "mrr": 0.766876, + "nAUC_map_max": 0.235344, + "nAUC_map_std": 0.209714, + "nAUC_map_diff1": 0.168035, + "nAUC_mrr_max": 0.367769, + "nAUC_mrr_std": 0.276248, + "nAUC_mrr_diff1": 0.210136, + "main_score": 0.64535, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 39.275365591049194, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json new file mode 100644 index 0000000000..47b8346a58 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "d3fb88f8f02e40887cd149695127462bbcf29b4a", + "task_name": "BIOSSES", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.881903, + "spearman": 0.843423, + "cosine_pearson": 0.881903, + "cosine_spearman": 0.843423, + "manhattan_pearson": 0.854734, + "manhattan_spearman": 0.840097, + "euclidean_pearson": 0.855002, + "euclidean_spearman": 0.843423, + "main_score": 0.843423, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 1.541583776473999, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json new file mode 100644 index 0000000000..5023d826bd --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "0fd18e25b25c072e09e0d92ab615fda904d66300", + "task_name": "Banking77Classification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.921916, + "f1": 0.921562, + "f1_weighted": 0.921562, + "scores_per_experiment": [ + { + "accuracy": 0.920779, + "f1": 0.920606, + "f1_weighted": 0.920606 + }, + { + "accuracy": 0.920779, + "f1": 0.920112, + "f1_weighted": 0.920112 + }, + { + "accuracy": 0.925649, + "f1": 0.925132, + "f1_weighted": 0.925132 + }, + { + "accuracy": 0.921753, + "f1": 0.921396, + "f1_weighted": 0.921396 + }, + { + "accuracy": 0.919805, + "f1": 0.919111, + "f1_weighted": 0.919111 + }, + { + "accuracy": 0.920779, + "f1": 0.920431, + "f1_weighted": 0.920431 + }, + { + "accuracy": 0.922727, + "f1": 0.922392, + "f1_weighted": 0.922392 + }, + { + "accuracy": 0.92013, + "f1": 0.920182, + "f1_weighted": 0.920182 + }, + { + "accuracy": 0.921753, + "f1": 0.921528, + "f1_weighted": 0.921528 + }, + { + "accuracy": 0.925, + "f1": 0.924725, + "f1_weighted": 0.924725 + } + ], + "main_score": 0.921916, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 83.1622564792633, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json new file mode 100644 index 0000000000..8751a97e1f --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json @@ -0,0 +1,34 @@ +{ + "dataset_revision": "f5dbc242e11dd8e24def4c4268607a49e02946dc", + "task_name": "BiorxivClusteringP2P.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.490396, + 0.486295, + 0.477355, + 0.457433, + 0.491164, + 0.467755, + 0.456287, + 0.484198, + 0.471648, + 0.477465 + ] + }, + "v_measure": 0.476, + "v_measure_std": 0.011977, + "main_score": 0.476, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 67.2755446434021, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json new file mode 100644 index 0000000000..78f79bfa05 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "4885aa143210c98657558c04aaf3dc47cfb54340", + "task_name": "CQADupstackGamingRetrieval", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.51223, + "ndcg_at_3": 0.59253, + "ndcg_at_5": 0.62118, + "ndcg_at_10": 0.64738, + "ndcg_at_20": 0.66333, + "ndcg_at_100": 0.68209, + "ndcg_at_1000": 0.6886, + "map_at_1": 0.44538, + "map_at_3": 0.55154, + "map_at_5": 0.57213, + "map_at_10": 0.58702, + "map_at_20": 0.59308, + "map_at_100": 0.59659, + "map_at_1000": 0.59695, + "recall_at_1": 0.44538, + "recall_at_3": 0.6465, + "recall_at_5": 0.71686, + "recall_at_10": 0.78955, + "recall_at_20": 0.84834, + "recall_at_100": 0.93727, + "recall_at_1000": 0.98218, + "precision_at_1": 0.51223, + "precision_at_3": 0.26541, + "precision_at_5": 0.18194, + "precision_at_10": 0.10408, + "precision_at_20": 0.05727, + "precision_at_100": 0.01297, + "precision_at_1000": 0.00138, + "mrr_at_1": 0.512226, + "mrr_at_3": 0.597179, + "mrr_at_5": 0.61116, + "mrr_at_10": 0.619716, + "mrr_at_20": 0.623124, + "mrr_at_100": 0.625292, + "mrr_at_1000": 0.62545, + "nauc_ndcg_at_1_max": 0.407808, + "nauc_ndcg_at_1_std": 0.003787, + "nauc_ndcg_at_1_diff1": 0.59931, + "nauc_ndcg_at_3_max": 0.409402, + "nauc_ndcg_at_3_std": -0.031688, + "nauc_ndcg_at_3_diff1": 0.540058, + "nauc_ndcg_at_5_max": 0.418696, + "nauc_ndcg_at_5_std": -0.024483, + "nauc_ndcg_at_5_diff1": 0.542322, + "nauc_ndcg_at_10_max": 0.433043, + "nauc_ndcg_at_10_std": 0.002885, + "nauc_ndcg_at_10_diff1": 0.538743, + "nauc_ndcg_at_20_max": 0.43649, + "nauc_ndcg_at_20_std": 0.013312, + "nauc_ndcg_at_20_diff1": 0.53948, + "nauc_ndcg_at_100_max": 0.442729, + "nauc_ndcg_at_100_std": 0.027246, + "nauc_ndcg_at_100_diff1": 0.544012, + "nauc_ndcg_at_1000_max": 0.438261, + "nauc_ndcg_at_1000_std": 0.018983, + "nauc_ndcg_at_1000_diff1": 0.546274, + "nauc_map_at_1_max": 0.339364, + "nauc_map_at_1_std": -0.030259, + "nauc_map_at_1_diff1": 0.604436, + "nauc_map_at_3_max": 0.391238, + "nauc_map_at_3_std": -0.040432, + "nauc_map_at_3_diff1": 0.559064, + "nauc_map_at_5_max": 0.401248, + "nauc_map_at_5_std": -0.033432, + "nauc_map_at_5_diff1": 0.557935, + "nauc_map_at_10_max": 0.41185, + "nauc_map_at_10_std": -0.01771, + "nauc_map_at_10_diff1": 0.555781, + "nauc_map_at_20_max": 0.41605, + "nauc_map_at_20_std": -0.010853, + "nauc_map_at_20_diff1": 0.555719, + "nauc_map_at_100_max": 0.418071, + "nauc_map_at_100_std": -0.00762, + "nauc_map_at_100_diff1": 0.5564, + "nauc_map_at_1000_max": 0.418147, + "nauc_map_at_1000_std": -0.007723, + "nauc_map_at_1000_diff1": 0.556446, + "nauc_recall_at_1_max": 0.339364, + "nauc_recall_at_1_std": -0.030259, + "nauc_recall_at_1_diff1": 0.604436, + "nauc_recall_at_3_max": 0.380521, + "nauc_recall_at_3_std": -0.075173, + "nauc_recall_at_3_diff1": 0.477173, + "nauc_recall_at_5_max": 0.397188, + "nauc_recall_at_5_std": -0.058986, + "nauc_recall_at_5_diff1": 0.469715, + "nauc_recall_at_10_max": 0.451511, + "nauc_recall_at_10_std": 0.02969, + "nauc_recall_at_10_diff1": 0.450546, + "nauc_recall_at_20_max": 0.486723, + "nauc_recall_at_20_std": 0.102864, + "nauc_recall_at_20_diff1": 0.432655, + "nauc_recall_at_100_max": 0.654966, + "nauc_recall_at_100_std": 0.427949, + "nauc_recall_at_100_diff1": 0.419215, + "nauc_recall_at_1000_max": 0.719294, + "nauc_recall_at_1000_std": 0.576633, + "nauc_recall_at_1000_diff1": 0.418855, + "nauc_precision_at_1_max": 0.407808, + "nauc_precision_at_1_std": 0.003787, + "nauc_precision_at_1_diff1": 0.59931, + "nauc_precision_at_3_max": 0.384692, + "nauc_precision_at_3_std": 0.033893, + "nauc_precision_at_3_diff1": 0.26159, + "nauc_precision_at_5_max": 0.350121, + "nauc_precision_at_5_std": 0.080802, + "nauc_precision_at_5_diff1": 0.155146, + "nauc_precision_at_10_max": 0.316295, + "nauc_precision_at_10_std": 0.177083, + "nauc_precision_at_10_diff1": 0.036495, + "nauc_precision_at_20_max": 0.291149, + "nauc_precision_at_20_std": 0.229941, + "nauc_precision_at_20_diff1": -0.035324, + "nauc_precision_at_100_max": 0.227825, + "nauc_precision_at_100_std": 0.275183, + "nauc_precision_at_100_diff1": -0.138239, + "nauc_precision_at_1000_max": 0.173701, + "nauc_precision_at_1000_std": 0.228133, + "nauc_precision_at_1000_diff1": -0.186182, + "nauc_mrr_at_1_max": 0.407808, + "nauc_mrr_at_1_std": 0.003787, + "nauc_mrr_at_1_diff1": 0.59931, + "nauc_mrr_at_3_max": 0.423313, + "nauc_mrr_at_3_std": -0.001266, + "nauc_mrr_at_3_diff1": 0.550723, + "nauc_mrr_at_5_max": 0.425225, + "nauc_mrr_at_5_std": 0.003053, + "nauc_mrr_at_5_diff1": 0.552921, + "nauc_mrr_at_10_max": 0.427748, + "nauc_mrr_at_10_std": 0.009068, + "nauc_mrr_at_10_diff1": 0.553012, + "nauc_mrr_at_20_max": 0.427643, + "nauc_mrr_at_20_std": 0.00992, + "nauc_mrr_at_20_diff1": 0.553353, + "nauc_mrr_at_100_max": 0.428065, + "nauc_mrr_at_100_std": 0.01093, + "nauc_mrr_at_100_diff1": 0.553998, + "nauc_mrr_at_1000_max": 0.427891, + "nauc_mrr_at_1000_std": 0.010673, + "nauc_mrr_at_1000_diff1": 0.5541, + "main_score": 0.64738, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 650.2527179718018, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json new file mode 100644 index 0000000000..db8e95a767 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "6c6430d3a6d36f8d2a829195bc5dc94d7e063e53", + "task_name": "CQADupstackUnixRetrieval", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.39366, + "ndcg_at_3": 0.44311, + "ndcg_at_5": 0.47461, + "ndcg_at_10": 0.50369, + "ndcg_at_20": 0.5258, + "ndcg_at_100": 0.55473, + "ndcg_at_1000": 0.57036, + "map_at_1": 0.32749, + "map_at_3": 0.40408, + "map_at_5": 0.42741, + "map_at_10": 0.44265, + "map_at_20": 0.4501, + "map_at_100": 0.45531, + "map_at_1000": 0.45615, + "recall_at_1": 0.32749, + "recall_at_3": 0.47976, + "recall_at_5": 0.55997, + "recall_at_10": 0.64251, + "recall_at_20": 0.72154, + "recall_at_100": 0.85793, + "recall_at_1000": 0.95857, + "precision_at_1": 0.39366, + "precision_at_3": 0.20367, + "precision_at_5": 0.14739, + "precision_at_10": 0.08741, + "precision_at_20": 0.05, + "precision_at_100": 0.01232, + "precision_at_1000": 0.00146, + "mrr_at_1": 0.393657, + "mrr_at_3": 0.460354, + "mrr_at_5": 0.476912, + "mrr_at_10": 0.487964, + "mrr_at_20": 0.492497, + "mrr_at_100": 0.495829, + "mrr_at_1000": 0.496297, + "nauc_ndcg_at_1_max": 0.44463, + "nauc_ndcg_at_1_std": -0.015682, + "nauc_ndcg_at_1_diff1": 0.562903, + "nauc_ndcg_at_3_max": 0.419213, + "nauc_ndcg_at_3_std": -0.029827, + "nauc_ndcg_at_3_diff1": 0.505618, + "nauc_ndcg_at_5_max": 0.418019, + "nauc_ndcg_at_5_std": -0.028134, + "nauc_ndcg_at_5_diff1": 0.499713, + "nauc_ndcg_at_10_max": 0.425748, + "nauc_ndcg_at_10_std": -0.015697, + "nauc_ndcg_at_10_diff1": 0.499959, + "nauc_ndcg_at_20_max": 0.428978, + "nauc_ndcg_at_20_std": -0.005225, + "nauc_ndcg_at_20_diff1": 0.49621, + "nauc_ndcg_at_100_max": 0.439613, + "nauc_ndcg_at_100_std": 0.007993, + "nauc_ndcg_at_100_diff1": 0.501558, + "nauc_ndcg_at_1000_max": 0.43798, + "nauc_ndcg_at_1000_std": 0.006165, + "nauc_ndcg_at_1000_diff1": 0.503335, + "nauc_map_at_1_max": 0.385575, + "nauc_map_at_1_std": -0.061803, + "nauc_map_at_1_diff1": 0.57487, + "nauc_map_at_3_max": 0.40398, + "nauc_map_at_3_std": -0.047842, + "nauc_map_at_3_diff1": 0.525218, + "nauc_map_at_5_max": 0.410905, + "nauc_map_at_5_std": -0.041884, + "nauc_map_at_5_diff1": 0.518296, + "nauc_map_at_10_max": 0.417156, + "nauc_map_at_10_std": -0.035179, + "nauc_map_at_10_diff1": 0.516495, + "nauc_map_at_20_max": 0.418659, + "nauc_map_at_20_std": -0.031193, + "nauc_map_at_20_diff1": 0.515188, + "nauc_map_at_100_max": 0.420993, + "nauc_map_at_100_std": -0.028535, + "nauc_map_at_100_diff1": 0.516537, + "nauc_map_at_1000_max": 0.420918, + "nauc_map_at_1000_std": -0.02845, + "nauc_map_at_1000_diff1": 0.516635, + "nauc_recall_at_1_max": 0.385575, + "nauc_recall_at_1_std": -0.061803, + "nauc_recall_at_1_diff1": 0.57487, + "nauc_recall_at_3_max": 0.386872, + "nauc_recall_at_3_std": -0.035699, + "nauc_recall_at_3_diff1": 0.456786, + "nauc_recall_at_5_max": 0.3802, + "nauc_recall_at_5_std": -0.028763, + "nauc_recall_at_5_diff1": 0.426804, + "nauc_recall_at_10_max": 0.393157, + "nauc_recall_at_10_std": 0.005455, + "nauc_recall_at_10_diff1": 0.418911, + "nauc_recall_at_20_max": 0.402413, + "nauc_recall_at_20_std": 0.050249, + "nauc_recall_at_20_diff1": 0.389581, + "nauc_recall_at_100_max": 0.506553, + "nauc_recall_at_100_std": 0.203902, + "nauc_recall_at_100_diff1": 0.39675, + "nauc_recall_at_1000_max": 0.705929, + "nauc_recall_at_1000_std": 0.582046, + "nauc_recall_at_1000_diff1": 0.348811, + "nauc_precision_at_1_max": 0.44463, + "nauc_precision_at_1_std": -0.015682, + "nauc_precision_at_1_diff1": 0.562903, + "nauc_precision_at_3_max": 0.403607, + "nauc_precision_at_3_std": 0.036033, + "nauc_precision_at_3_diff1": 0.33963, + "nauc_precision_at_5_max": 0.365588, + "nauc_precision_at_5_std": 0.075154, + "nauc_precision_at_5_diff1": 0.237522, + "nauc_precision_at_10_max": 0.327626, + "nauc_precision_at_10_std": 0.115252, + "nauc_precision_at_10_diff1": 0.160054, + "nauc_precision_at_20_max": 0.275445, + "nauc_precision_at_20_std": 0.154363, + "nauc_precision_at_20_diff1": 0.0885, + "nauc_precision_at_100_max": 0.1835, + "nauc_precision_at_100_std": 0.209154, + "nauc_precision_at_100_diff1": -0.02305, + "nauc_precision_at_1000_max": 0.022068, + "nauc_precision_at_1000_std": 0.160602, + "nauc_precision_at_1000_diff1": -0.136694, + "nauc_mrr_at_1_max": 0.44463, + "nauc_mrr_at_1_std": -0.015682, + "nauc_mrr_at_1_diff1": 0.562903, + "nauc_mrr_at_3_max": 0.443257, + "nauc_mrr_at_3_std": -0.007647, + "nauc_mrr_at_3_diff1": 0.520702, + "nauc_mrr_at_5_max": 0.441252, + "nauc_mrr_at_5_std": -0.004913, + "nauc_mrr_at_5_diff1": 0.516035, + "nauc_mrr_at_10_max": 0.443589, + "nauc_mrr_at_10_std": 0.000172, + "nauc_mrr_at_10_diff1": 0.518076, + "nauc_mrr_at_20_max": 0.443999, + "nauc_mrr_at_20_std": 0.001144, + "nauc_mrr_at_20_diff1": 0.51742, + "nauc_mrr_at_100_max": 0.444806, + "nauc_mrr_at_100_std": 0.001668, + "nauc_mrr_at_100_diff1": 0.518103, + "nauc_mrr_at_1000_max": 0.444704, + "nauc_mrr_at_1000_std": 0.001484, + "nauc_mrr_at_1000_diff1": 0.518201, + "main_score": 0.50369, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 1100.645089149475, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json new file mode 100644 index 0000000000..ebb9002063 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "3a309e201f3c2c4b13bd4a367a8f37eee2ec1d21", + "task_name": "ClimateFEVERHardNegatives", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.462, + "ndcg_at_3": 0.37716, + "ndcg_at_5": 0.39581, + "ndcg_at_10": 0.42982, + "ndcg_at_20": 0.45481, + "ndcg_at_100": 0.49993, + "ndcg_at_1000": 0.53185, + "map_at_1": 0.20483, + "map_at_3": 0.28769, + "map_at_5": 0.31324, + "map_at_10": 0.33241, + "map_at_20": 0.34235, + "map_at_100": 0.35192, + "map_at_1000": 0.35394, + "recall_at_1": 0.20483, + "recall_at_3": 0.33378, + "recall_at_5": 0.3971, + "recall_at_10": 0.47217, + "recall_at_20": 0.5406, + "recall_at_100": 0.71007, + "recall_at_1000": 0.88542, + "precision_at_1": 0.462, + "precision_at_3": 0.27667, + "precision_at_5": 0.2056, + "precision_at_10": 0.1254, + "precision_at_20": 0.0739, + "precision_at_100": 0.02042, + "precision_at_1000": 0.00265, + "mrr_at_1": 0.462, + "mrr_at_3": 0.545333, + "mrr_at_5": 0.562283, + "mrr_at_10": 0.571472, + "mrr_at_20": 0.574889, + "mrr_at_100": 0.57729, + "mrr_at_1000": 0.577493, + "nauc_ndcg_at_1_max": 0.474304, + "nauc_ndcg_at_1_std": 0.152844, + "nauc_ndcg_at_1_diff1": 0.398592, + "nauc_ndcg_at_3_max": 0.437827, + "nauc_ndcg_at_3_std": 0.13689, + "nauc_ndcg_at_3_diff1": 0.326084, + "nauc_ndcg_at_5_max": 0.443865, + "nauc_ndcg_at_5_std": 0.149803, + "nauc_ndcg_at_5_diff1": 0.31668, + "nauc_ndcg_at_10_max": 0.444417, + "nauc_ndcg_at_10_std": 0.1541, + "nauc_ndcg_at_10_diff1": 0.307947, + "nauc_ndcg_at_20_max": 0.444885, + "nauc_ndcg_at_20_std": 0.15503, + "nauc_ndcg_at_20_diff1": 0.308789, + "nauc_ndcg_at_100_max": 0.445336, + "nauc_ndcg_at_100_std": 0.166122, + "nauc_ndcg_at_100_diff1": 0.309189, + "nauc_ndcg_at_1000_max": 0.453964, + "nauc_ndcg_at_1000_std": 0.173374, + "nauc_ndcg_at_1000_diff1": 0.319483, + "nauc_map_at_1_max": 0.517265, + "nauc_map_at_1_std": 0.144934, + "nauc_map_at_1_diff1": 0.449793, + "nauc_map_at_3_max": 0.45466, + "nauc_map_at_3_std": 0.134742, + "nauc_map_at_3_diff1": 0.344392, + "nauc_map_at_5_max": 0.448878, + "nauc_map_at_5_std": 0.142098, + "nauc_map_at_5_diff1": 0.329891, + "nauc_map_at_10_max": 0.449806, + "nauc_map_at_10_std": 0.144759, + "nauc_map_at_10_diff1": 0.323499, + "nauc_map_at_20_max": 0.450072, + "nauc_map_at_20_std": 0.145512, + "nauc_map_at_20_diff1": 0.324196, + "nauc_map_at_100_max": 0.449966, + "nauc_map_at_100_std": 0.147928, + "nauc_map_at_100_diff1": 0.324396, + "nauc_map_at_1000_max": 0.450126, + "nauc_map_at_1000_std": 0.148267, + "nauc_map_at_1000_diff1": 0.324726, + "nauc_recall_at_1_max": 0.517265, + "nauc_recall_at_1_std": 0.144934, + "nauc_recall_at_1_diff1": 0.449793, + "nauc_recall_at_3_max": 0.411328, + "nauc_recall_at_3_std": 0.125653, + "nauc_recall_at_3_diff1": 0.290019, + "nauc_recall_at_5_max": 0.384828, + "nauc_recall_at_5_std": 0.139753, + "nauc_recall_at_5_diff1": 0.247526, + "nauc_recall_at_10_max": 0.368653, + "nauc_recall_at_10_std": 0.141008, + "nauc_recall_at_10_diff1": 0.218651, + "nauc_recall_at_20_max": 0.357, + "nauc_recall_at_20_std": 0.142405, + "nauc_recall_at_20_diff1": 0.210581, + "nauc_recall_at_100_max": 0.328821, + "nauc_recall_at_100_std": 0.181285, + "nauc_recall_at_100_diff1": 0.17739, + "nauc_recall_at_1000_max": 0.402177, + "nauc_recall_at_1000_std": 0.30868, + "nauc_recall_at_1000_diff1": 0.226488, + "nauc_precision_at_1_max": 0.474304, + "nauc_precision_at_1_std": 0.152844, + "nauc_precision_at_1_diff1": 0.398592, + "nauc_precision_at_3_max": 0.296721, + "nauc_precision_at_3_std": 0.106793, + "nauc_precision_at_3_diff1": 0.163788, + "nauc_precision_at_5_max": 0.242218, + "nauc_precision_at_5_std": 0.125046, + "nauc_precision_at_5_diff1": 0.09813, + "nauc_precision_at_10_max": 0.195409, + "nauc_precision_at_10_std": 0.113844, + "nauc_precision_at_10_diff1": 0.049313, + "nauc_precision_at_20_max": 0.152119, + "nauc_precision_at_20_std": 0.093926, + "nauc_precision_at_20_diff1": 0.025822, + "nauc_precision_at_100_max": 0.023052, + "nauc_precision_at_100_std": 0.090661, + "nauc_precision_at_100_diff1": -0.059335, + "nauc_precision_at_1000_max": -0.102018, + "nauc_precision_at_1000_std": 0.065361, + "nauc_precision_at_1000_diff1": -0.122782, + "nauc_mrr_at_1_max": 0.474304, + "nauc_mrr_at_1_std": 0.152844, + "nauc_mrr_at_1_diff1": 0.398592, + "nauc_mrr_at_3_max": 0.469168, + "nauc_mrr_at_3_std": 0.156072, + "nauc_mrr_at_3_diff1": 0.369288, + "nauc_mrr_at_5_max": 0.469478, + "nauc_mrr_at_5_std": 0.160179, + "nauc_mrr_at_5_diff1": 0.364103, + "nauc_mrr_at_10_max": 0.469099, + "nauc_mrr_at_10_std": 0.163394, + "nauc_mrr_at_10_diff1": 0.36423, + "nauc_mrr_at_20_max": 0.469284, + "nauc_mrr_at_20_std": 0.16293, + "nauc_mrr_at_20_diff1": 0.36428, + "nauc_mrr_at_100_max": 0.468948, + "nauc_mrr_at_100_std": 0.163188, + "nauc_mrr_at_100_diff1": 0.364659, + "nauc_mrr_at_1000_max": 0.469065, + "nauc_mrr_at_1000_std": 0.16318, + "nauc_mrr_at_1000_diff1": 0.364817, + "main_score": 0.42982, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 1263.052413225174, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json new file mode 100644 index 0000000000..291978dc9b --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "080c9ed6267b65029207906e815d44a9240bafca", + "task_name": "FEVERHardNegatives", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.891, + "ndcg_at_3": 0.91462, + "ndcg_at_5": 0.9211, + "ndcg_at_10": 0.92665, + "ndcg_at_20": 0.92904, + "ndcg_at_100": 0.93215, + "ndcg_at_1000": 0.93337, + "map_at_1": 0.83821, + "map_at_3": 0.89048, + "map_at_5": 0.89666, + "map_at_10": 0.90033, + "map_at_20": 0.90148, + "map_at_100": 0.9023, + "map_at_1000": 0.90239, + "recall_at_1": 0.83821, + "recall_at_3": 0.93705, + "recall_at_5": 0.9545, + "recall_at_10": 0.96984, + "recall_at_20": 0.97761, + "recall_at_100": 0.98932, + "recall_at_1000": 0.99643, + "precision_at_1": 0.891, + "precision_at_3": 0.33767, + "precision_at_5": 0.2088, + "precision_at_10": 0.1076, + "precision_at_20": 0.05475, + "precision_at_100": 0.01136, + "precision_at_1000": 0.00116, + "mrr_at_1": 0.891, + "mrr_at_3": 0.9335, + "mrr_at_5": 0.93615, + "mrr_at_10": 0.936912, + "mrr_at_20": 0.937051, + "mrr_at_100": 0.937051, + "mrr_at_1000": 0.937051, + "nauc_ndcg_at_1_max": 0.214048, + "nauc_ndcg_at_1_std": -0.112648, + "nauc_ndcg_at_1_diff1": 0.78364, + "nauc_ndcg_at_3_max": 0.177323, + "nauc_ndcg_at_3_std": -0.129626, + "nauc_ndcg_at_3_diff1": 0.533015, + "nauc_ndcg_at_5_max": 0.193809, + "nauc_ndcg_at_5_std": -0.100967, + "nauc_ndcg_at_5_diff1": 0.52554, + "nauc_ndcg_at_10_max": 0.17801, + "nauc_ndcg_at_10_std": -0.091344, + "nauc_ndcg_at_10_diff1": 0.54045, + "nauc_ndcg_at_20_max": 0.190935, + "nauc_ndcg_at_20_std": -0.085883, + "nauc_ndcg_at_20_diff1": 0.546866, + "nauc_ndcg_at_100_max": 0.203113, + "nauc_ndcg_at_100_std": -0.076576, + "nauc_ndcg_at_100_diff1": 0.561754, + "nauc_ndcg_at_1000_max": 0.196507, + "nauc_ndcg_at_1000_std": -0.086932, + "nauc_ndcg_at_1000_diff1": 0.572445, + "nauc_map_at_1_max": 0.146831, + "nauc_map_at_1_std": -0.104754, + "nauc_map_at_1_diff1": 0.567078, + "nauc_map_at_3_max": 0.153744, + "nauc_map_at_3_std": -0.128707, + "nauc_map_at_3_diff1": 0.511328, + "nauc_map_at_5_max": 0.17542, + "nauc_map_at_5_std": -0.103904, + "nauc_map_at_5_diff1": 0.517856, + "nauc_map_at_10_max": 0.172134, + "nauc_map_at_10_std": -0.097654, + "nauc_map_at_10_diff1": 0.52931, + "nauc_map_at_20_max": 0.179188, + "nauc_map_at_20_std": -0.095111, + "nauc_map_at_20_diff1": 0.532081, + "nauc_map_at_100_max": 0.181991, + "nauc_map_at_100_std": -0.092026, + "nauc_map_at_100_diff1": 0.534797, + "nauc_map_at_1000_max": 0.181765, + "nauc_map_at_1000_std": -0.092431, + "nauc_map_at_1000_diff1": 0.535372, + "nauc_recall_at_1_max": 0.146831, + "nauc_recall_at_1_std": -0.104754, + "nauc_recall_at_1_diff1": 0.567078, + "nauc_recall_at_3_max": 0.106682, + "nauc_recall_at_3_std": -0.169996, + "nauc_recall_at_3_diff1": 0.278442, + "nauc_recall_at_5_max": 0.18159, + "nauc_recall_at_5_std": -0.064657, + "nauc_recall_at_5_diff1": 0.197903, + "nauc_recall_at_10_max": 0.051087, + "nauc_recall_at_10_std": -0.014731, + "nauc_recall_at_10_diff1": 0.126128, + "nauc_recall_at_20_max": 0.09394, + "nauc_recall_at_20_std": 0.069832, + "nauc_recall_at_20_diff1": 0.075296, + "nauc_recall_at_100_max": 0.282344, + "nauc_recall_at_100_std": 0.437296, + "nauc_recall_at_100_diff1": -0.092733, + "nauc_recall_at_1000_max": -0.237647, + "nauc_recall_at_1000_std": 0.38183, + "nauc_recall_at_1000_diff1": -0.192577, + "nauc_precision_at_1_max": 0.214048, + "nauc_precision_at_1_std": -0.112648, + "nauc_precision_at_1_diff1": 0.78364, + "nauc_precision_at_3_max": 0.147823, + "nauc_precision_at_3_std": 0.010227, + "nauc_precision_at_3_diff1": 0.122223, + "nauc_precision_at_5_max": 0.170451, + "nauc_precision_at_5_std": 0.170205, + "nauc_precision_at_5_diff1": -0.008576, + "nauc_precision_at_10_max": 0.064714, + "nauc_precision_at_10_std": 0.17084, + "nauc_precision_at_10_diff1": -0.03935, + "nauc_precision_at_20_max": 0.106664, + "nauc_precision_at_20_std": 0.171147, + "nauc_precision_at_20_diff1": -0.060534, + "nauc_precision_at_100_max": 0.078598, + "nauc_precision_at_100_std": 0.152895, + "nauc_precision_at_100_diff1": -0.059482, + "nauc_precision_at_1000_max": 0.027365, + "nauc_precision_at_1000_std": 0.089309, + "nauc_precision_at_1000_diff1": -0.045451, + "nauc_mrr_at_1_max": 0.214048, + "nauc_mrr_at_1_std": -0.112648, + "nauc_mrr_at_1_diff1": 0.78364, + "nauc_mrr_at_3_max": 0.23705, + "nauc_mrr_at_3_std": -0.145559, + "nauc_mrr_at_3_diff1": 0.768899, + "nauc_mrr_at_5_max": 0.243731, + "nauc_mrr_at_5_std": -0.137786, + "nauc_mrr_at_5_diff1": 0.77134, + "nauc_mrr_at_10_max": 0.237988, + "nauc_mrr_at_10_std": -0.136701, + "nauc_mrr_at_10_diff1": 0.772593, + "nauc_mrr_at_20_max": 0.236863, + "nauc_mrr_at_20_std": -0.136421, + "nauc_mrr_at_20_diff1": 0.772484, + "nauc_mrr_at_100_max": 0.236863, + "nauc_mrr_at_100_std": -0.136421, + "nauc_mrr_at_100_diff1": 0.772484, + "nauc_mrr_at_1000_max": 0.236863, + "nauc_mrr_at_1000_std": -0.136421, + "nauc_mrr_at_1000_diff1": 0.772484, + "main_score": 0.92665, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 3064.586258649826, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json new file mode 100644 index 0000000000..122de6358d --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "27a168819829fe9bcd655c2df245fb19452e8e06", + "task_name": "FiQA2018", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.50926, + "ndcg_at_3": 0.48598, + "ndcg_at_5": 0.50169, + "ndcg_at_10": 0.53241, + "ndcg_at_20": 0.56651, + "ndcg_at_100": 0.60608, + "ndcg_at_1000": 0.6233, + "map_at_1": 0.26871, + "map_at_3": 0.39136, + "map_at_5": 0.42358, + "map_at_10": 0.44847, + "map_at_20": 0.46347, + "map_at_100": 0.47321, + "map_at_1000": 0.47441, + "recall_at_1": 0.26871, + "recall_at_3": 0.44748, + "recall_at_5": 0.52173, + "recall_at_10": 0.61466, + "recall_at_20": 0.71757, + "recall_at_100": 0.87354, + "recall_at_1000": 0.97511, + "precision_at_1": 0.50926, + "precision_at_3": 0.32716, + "precision_at_5": 0.23858, + "precision_at_10": 0.14691, + "precision_at_20": 0.08858, + "precision_at_100": 0.0225, + "precision_at_1000": 0.00256, + "mrr_at_1": 0.509259, + "mrr_at_3": 0.575874, + "mrr_at_5": 0.589918, + "mrr_at_10": 0.600236, + "mrr_at_20": 0.604859, + "mrr_at_100": 0.607012, + "mrr_at_1000": 0.607208, + "nauc_ndcg_at_1_max": 0.368567, + "nauc_ndcg_at_1_std": -0.200189, + "nauc_ndcg_at_1_diff1": 0.531027, + "nauc_ndcg_at_3_max": 0.300193, + "nauc_ndcg_at_3_std": -0.246348, + "nauc_ndcg_at_3_diff1": 0.403333, + "nauc_ndcg_at_5_max": 0.293006, + "nauc_ndcg_at_5_std": -0.252776, + "nauc_ndcg_at_5_diff1": 0.417455, + "nauc_ndcg_at_10_max": 0.306157, + "nauc_ndcg_at_10_std": -0.248001, + "nauc_ndcg_at_10_diff1": 0.420704, + "nauc_ndcg_at_20_max": 0.327692, + "nauc_ndcg_at_20_std": -0.215813, + "nauc_ndcg_at_20_diff1": 0.413985, + "nauc_ndcg_at_100_max": 0.341648, + "nauc_ndcg_at_100_std": -0.203312, + "nauc_ndcg_at_100_diff1": 0.425225, + "nauc_ndcg_at_1000_max": 0.342827, + "nauc_ndcg_at_1000_std": -0.204177, + "nauc_ndcg_at_1000_diff1": 0.425317, + "nauc_map_at_1_max": 0.155928, + "nauc_map_at_1_std": -0.223051, + "nauc_map_at_1_diff1": 0.46776, + "nauc_map_at_3_max": 0.211376, + "nauc_map_at_3_std": -0.254761, + "nauc_map_at_3_diff1": 0.411184, + "nauc_map_at_5_max": 0.248721, + "nauc_map_at_5_std": -0.257619, + "nauc_map_at_5_diff1": 0.413817, + "nauc_map_at_10_max": 0.271626, + "nauc_map_at_10_std": -0.261391, + "nauc_map_at_10_diff1": 0.407691, + "nauc_map_at_20_max": 0.284252, + "nauc_map_at_20_std": -0.248213, + "nauc_map_at_20_diff1": 0.403109, + "nauc_map_at_100_max": 0.290939, + "nauc_map_at_100_std": -0.242207, + "nauc_map_at_100_diff1": 0.404224, + "nauc_map_at_1000_max": 0.291502, + "nauc_map_at_1000_std": -0.241764, + "nauc_map_at_1000_diff1": 0.404387, + "nauc_recall_at_1_max": 0.155928, + "nauc_recall_at_1_std": -0.223051, + "nauc_recall_at_1_diff1": 0.46776, + "nauc_recall_at_3_max": 0.167052, + "nauc_recall_at_3_std": -0.269709, + "nauc_recall_at_3_diff1": 0.363595, + "nauc_recall_at_5_max": 0.214394, + "nauc_recall_at_5_std": -0.25715, + "nauc_recall_at_5_diff1": 0.36787, + "nauc_recall_at_10_max": 0.262804, + "nauc_recall_at_10_std": -0.235078, + "nauc_recall_at_10_diff1": 0.344767, + "nauc_recall_at_20_max": 0.303561, + "nauc_recall_at_20_std": -0.126148, + "nauc_recall_at_20_diff1": 0.305773, + "nauc_recall_at_100_max": 0.35974, + "nauc_recall_at_100_std": -0.054521, + "nauc_recall_at_100_diff1": 0.368637, + "nauc_recall_at_1000_max": 0.463538, + "nauc_recall_at_1000_std": 0.054397, + "nauc_recall_at_1000_diff1": 0.385036, + "nauc_precision_at_1_max": 0.368567, + "nauc_precision_at_1_std": -0.200189, + "nauc_precision_at_1_diff1": 0.531027, + "nauc_precision_at_3_max": 0.337362, + "nauc_precision_at_3_std": -0.153392, + "nauc_precision_at_3_diff1": 0.195801, + "nauc_precision_at_5_max": 0.359002, + "nauc_precision_at_5_std": -0.122896, + "nauc_precision_at_5_diff1": 0.144842, + "nauc_precision_at_10_max": 0.376863, + "nauc_precision_at_10_std": -0.073483, + "nauc_precision_at_10_diff1": 0.080258, + "nauc_precision_at_20_max": 0.371371, + "nauc_precision_at_20_std": 0.051755, + "nauc_precision_at_20_diff1": 0.002645, + "nauc_precision_at_100_max": 0.314855, + "nauc_precision_at_100_std": 0.14376, + "nauc_precision_at_100_diff1": -0.053079, + "nauc_precision_at_1000_max": 0.266046, + "nauc_precision_at_1000_std": 0.170396, + "nauc_precision_at_1000_diff1": -0.102412, + "nauc_mrr_at_1_max": 0.368567, + "nauc_mrr_at_1_std": -0.200189, + "nauc_mrr_at_1_diff1": 0.531027, + "nauc_mrr_at_3_max": 0.356378, + "nauc_mrr_at_3_std": -0.213955, + "nauc_mrr_at_3_diff1": 0.492711, + "nauc_mrr_at_5_max": 0.36178, + "nauc_mrr_at_5_std": -0.208567, + "nauc_mrr_at_5_diff1": 0.495888, + "nauc_mrr_at_10_max": 0.368793, + "nauc_mrr_at_10_std": -0.199794, + "nauc_mrr_at_10_diff1": 0.496565, + "nauc_mrr_at_20_max": 0.368667, + "nauc_mrr_at_20_std": -0.196591, + "nauc_mrr_at_20_diff1": 0.497033, + "nauc_mrr_at_100_max": 0.368691, + "nauc_mrr_at_100_std": -0.197094, + "nauc_mrr_at_100_diff1": 0.49761, + "nauc_mrr_at_1000_max": 0.368627, + "nauc_mrr_at_1000_std": -0.197166, + "nauc_mrr_at_1000_diff1": 0.497603, + "main_score": 0.53241, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 1154.474021911621, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json new file mode 100644 index 0000000000..e930db8472 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "617612fa63afcb60e3b134bed8b7216a99707c37", + "task_name": "HotpotQAHardNegatives", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.779, + "ndcg_at_3": 0.67551, + "ndcg_at_5": 0.70928, + "ndcg_at_10": 0.73406, + "ndcg_at_20": 0.75062, + "ndcg_at_100": 0.76904, + "ndcg_at_1000": 0.77825, + "map_at_1": 0.3895, + "map_at_3": 0.60967, + "map_at_5": 0.63727, + "map_at_10": 0.65224, + "map_at_20": 0.65887, + "map_at_100": 0.66267, + "map_at_1000": 0.66321, + "recall_at_1": 0.3895, + "recall_at_3": 0.658, + "recall_at_5": 0.725, + "recall_at_10": 0.787, + "recall_at_20": 0.8405, + "recall_at_100": 0.9225, + "recall_at_1000": 0.982, + "precision_at_1": 0.779, + "precision_at_3": 0.43867, + "precision_at_5": 0.29, + "precision_at_10": 0.1574, + "precision_at_20": 0.08405, + "precision_at_100": 0.01845, + "precision_at_1000": 0.00196, + "mrr_at_1": 0.779, + "mrr_at_3": 0.834667, + "mrr_at_5": 0.840467, + "mrr_at_10": 0.844268, + "mrr_at_20": 0.845757, + "mrr_at_100": 0.846195, + "mrr_at_1000": 0.846229, + "nauc_ndcg_at_1_max": 0.301675, + "nauc_ndcg_at_1_std": -0.271193, + "nauc_ndcg_at_1_diff1": 0.550134, + "nauc_ndcg_at_3_max": 0.226032, + "nauc_ndcg_at_3_std": -0.135486, + "nauc_ndcg_at_3_diff1": 0.157724, + "nauc_ndcg_at_5_max": 0.269228, + "nauc_ndcg_at_5_std": -0.126897, + "nauc_ndcg_at_5_diff1": 0.187188, + "nauc_ndcg_at_10_max": 0.286673, + "nauc_ndcg_at_10_std": -0.112734, + "nauc_ndcg_at_10_diff1": 0.206109, + "nauc_ndcg_at_20_max": 0.284172, + "nauc_ndcg_at_20_std": -0.113424, + "nauc_ndcg_at_20_diff1": 0.210625, + "nauc_ndcg_at_100_max": 0.287254, + "nauc_ndcg_at_100_std": -0.097731, + "nauc_ndcg_at_100_diff1": 0.221286, + "nauc_ndcg_at_1000_max": 0.282467, + "nauc_ndcg_at_1000_std": -0.109546, + "nauc_ndcg_at_1000_diff1": 0.224459, + "nauc_map_at_1_max": 0.301675, + "nauc_map_at_1_std": -0.271193, + "nauc_map_at_1_diff1": 0.550134, + "nauc_map_at_3_max": 0.203275, + "nauc_map_at_3_std": -0.116751, + "nauc_map_at_3_diff1": 0.114545, + "nauc_map_at_5_max": 0.238364, + "nauc_map_at_5_std": -0.111162, + "nauc_map_at_5_diff1": 0.140277, + "nauc_map_at_10_max": 0.247035, + "nauc_map_at_10_std": -0.103935, + "nauc_map_at_10_diff1": 0.150077, + "nauc_map_at_20_max": 0.245978, + "nauc_map_at_20_std": -0.104624, + "nauc_map_at_20_diff1": 0.151966, + "nauc_map_at_100_max": 0.246235, + "nauc_map_at_100_std": -0.102565, + "nauc_map_at_100_diff1": 0.153465, + "nauc_map_at_1000_max": 0.246156, + "nauc_map_at_1000_std": -0.102833, + "nauc_map_at_1000_diff1": 0.153589, + "nauc_recall_at_1_max": 0.301675, + "nauc_recall_at_1_std": -0.271193, + "nauc_recall_at_1_diff1": 0.550134, + "nauc_recall_at_3_max": 0.206929, + "nauc_recall_at_3_std": -0.091073, + "nauc_recall_at_3_diff1": 0.044917, + "nauc_recall_at_5_max": 0.284735, + "nauc_recall_at_5_std": -0.064207, + "nauc_recall_at_5_diff1": 0.085058, + "nauc_recall_at_10_max": 0.333203, + "nauc_recall_at_10_std": -0.011376, + "nauc_recall_at_10_diff1": 0.119464, + "nauc_recall_at_20_max": 0.329359, + "nauc_recall_at_20_std": 0.012187, + "nauc_recall_at_20_diff1": 0.112417, + "nauc_recall_at_100_max": 0.412009, + "nauc_recall_at_100_std": 0.278749, + "nauc_recall_at_100_diff1": 0.138695, + "nauc_recall_at_1000_max": 0.557734, + "nauc_recall_at_1000_std": 0.663295, + "nauc_recall_at_1000_diff1": 0.094901, + "nauc_precision_at_1_max": 0.301675, + "nauc_precision_at_1_std": -0.271193, + "nauc_precision_at_1_diff1": 0.550134, + "nauc_precision_at_3_max": 0.206929, + "nauc_precision_at_3_std": -0.091073, + "nauc_precision_at_3_diff1": 0.044917, + "nauc_precision_at_5_max": 0.284735, + "nauc_precision_at_5_std": -0.064207, + "nauc_precision_at_5_diff1": 0.085058, + "nauc_precision_at_10_max": 0.333203, + "nauc_precision_at_10_std": -0.011376, + "nauc_precision_at_10_diff1": 0.119464, + "nauc_precision_at_20_max": 0.329359, + "nauc_precision_at_20_std": 0.012187, + "nauc_precision_at_20_diff1": 0.112417, + "nauc_precision_at_100_max": 0.412009, + "nauc_precision_at_100_std": 0.278749, + "nauc_precision_at_100_diff1": 0.138695, + "nauc_precision_at_1000_max": 0.557734, + "nauc_precision_at_1000_std": 0.663295, + "nauc_precision_at_1000_diff1": 0.094901, + "nauc_mrr_at_1_max": 0.301675, + "nauc_mrr_at_1_std": -0.271193, + "nauc_mrr_at_1_diff1": 0.550134, + "nauc_mrr_at_3_max": 0.333939, + "nauc_mrr_at_3_std": -0.283204, + "nauc_mrr_at_3_diff1": 0.527815, + "nauc_mrr_at_5_max": 0.330995, + "nauc_mrr_at_5_std": -0.285468, + "nauc_mrr_at_5_diff1": 0.530699, + "nauc_mrr_at_10_max": 0.331107, + "nauc_mrr_at_10_std": -0.284727, + "nauc_mrr_at_10_diff1": 0.532965, + "nauc_mrr_at_20_max": 0.330488, + "nauc_mrr_at_20_std": -0.284426, + "nauc_mrr_at_20_diff1": 0.533013, + "nauc_mrr_at_100_max": 0.329802, + "nauc_mrr_at_100_std": -0.283238, + "nauc_mrr_at_100_diff1": 0.532894, + "nauc_mrr_at_1000_max": 0.329752, + "nauc_mrr_at_1000_std": -0.283357, + "nauc_mrr_at_1000_diff1": 0.532932, + "main_score": 0.73406, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 2734.727002620697, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json new file mode 100644 index 0000000000..54f34610e9 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json @@ -0,0 +1,95 @@ +{ + "dataset_revision": "3d86128a09e091d6018b6d26cad27f2739fc2db7", + "task_name": "ImdbClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.920364, + "f1": 0.920196, + "f1_weighted": 0.920196, + "ap": 0.882324, + "ap_weighted": 0.882324, + "scores_per_experiment": [ + { + "accuracy": 0.90076, + "f1": 0.900233, + "f1_weighted": 0.900233, + "ap": 0.888305, + "ap_weighted": 0.888305 + }, + { + "accuracy": 0.90816, + "f1": 0.908055, + "f1_weighted": 0.908055, + "ap": 0.882768, + "ap_weighted": 0.882768 + }, + { + "accuracy": 0.92724, + "f1": 0.927025, + "f1_weighted": 0.927025, + "ap": 0.878279, + "ap_weighted": 0.878279 + }, + { + "accuracy": 0.93, + "f1": 0.929846, + "f1_weighted": 0.929846, + "ap": 0.884075, + "ap_weighted": 0.884075 + }, + { + "accuracy": 0.93772, + "f1": 0.937707, + "f1_weighted": 0.937707, + "ap": 0.904994, + "ap_weighted": 0.904994 + }, + { + "accuracy": 0.92612, + "f1": 0.926085, + "f1_weighted": 0.926085, + "ap": 0.887026, + "ap_weighted": 0.887026 + }, + { + "accuracy": 0.94032, + "f1": 0.940319, + "f1_weighted": 0.940319, + "ap": 0.912839, + "ap_weighted": 0.912839 + }, + { + "accuracy": 0.90116, + "f1": 0.900826, + "f1_weighted": 0.900826, + "ap": 0.844772, + "ap_weighted": 0.844772 + }, + { + "accuracy": 0.914, + "f1": 0.913714, + "f1_weighted": 0.913714, + "ap": 0.860691, + "ap_weighted": 0.860691 + }, + { + "accuracy": 0.91816, + "f1": 0.918146, + "f1_weighted": 0.918146, + "ap": 0.879493, + "ap_weighted": 0.879493 + } + ], + "main_score": 0.920364, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 738.5170748233795, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json new file mode 100644 index 0000000000..660b9c3e37 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "task_name": "MTOPDomainClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.984086, + "f1": 0.981618, + "f1_weighted": 0.984043, + "scores_per_experiment": [ + { + "accuracy": 0.984268, + "f1": 0.981626, + "f1_weighted": 0.984249 + }, + { + "accuracy": 0.98404, + "f1": 0.981663, + "f1_weighted": 0.983953 + }, + { + "accuracy": 0.984268, + "f1": 0.981551, + "f1_weighted": 0.984214 + }, + { + "accuracy": 0.983812, + "f1": 0.98077, + "f1_weighted": 0.983748 + }, + { + "accuracy": 0.984268, + "f1": 0.981548, + "f1_weighted": 0.984178 + }, + { + "accuracy": 0.981988, + "f1": 0.979388, + "f1_weighted": 0.981945 + }, + { + "accuracy": 0.987916, + "f1": 0.986171, + "f1_weighted": 0.987894 + }, + { + "accuracy": 0.986092, + "f1": 0.983737, + "f1_weighted": 0.986085 + }, + { + "accuracy": 0.97948, + "f1": 0.977344, + "f1_weighted": 0.979495 + }, + { + "accuracy": 0.984724, + "f1": 0.98238, + "f1_weighted": 0.984669 + } + ], + "main_score": 0.984086, + "hf_subset": "en", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 42.632185220718384, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json new file mode 100644 index 0000000000..6476922853 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "4672e20407010da34463acc759c162ca9734bca6", + "task_name": "MassiveIntentClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.855447, + "f1": 0.829259, + "f1_weighted": 0.849901, + "scores_per_experiment": [ + { + "accuracy": 0.852051, + "f1": 0.83253, + "f1_weighted": 0.848228 + }, + { + "accuracy": 0.848016, + "f1": 0.82379, + "f1_weighted": 0.841601 + }, + { + "accuracy": 0.843645, + "f1": 0.814109, + "f1_weighted": 0.83607 + }, + { + "accuracy": 0.863147, + "f1": 0.830559, + "f1_weighted": 0.857771 + }, + { + "accuracy": 0.85037, + "f1": 0.824199, + "f1_weighted": 0.843058 + }, + { + "accuracy": 0.854069, + "f1": 0.826279, + "f1_weighted": 0.849188 + }, + { + "accuracy": 0.856759, + "f1": 0.832055, + "f1_weighted": 0.850073 + }, + { + "accuracy": 0.864156, + "f1": 0.839417, + "f1_weighted": 0.858056 + }, + { + "accuracy": 0.860794, + "f1": 0.836607, + "f1_weighted": 0.85758 + }, + { + "accuracy": 0.861466, + "f1": 0.833042, + "f1_weighted": 0.857385 + } + ], + "main_score": 0.855447, + "hf_subset": "en", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 69.49780416488647, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json new file mode 100644 index 0000000000..4cbeb81c53 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "fad2c6e8459f9e1c45d9315f4953d921437d70f8", + "task_name": "MassiveScenarioClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.903968, + "f1": 0.894734, + "f1_weighted": 0.901256, + "scores_per_experiment": [ + { + "accuracy": 0.904506, + "f1": 0.896596, + "f1_weighted": 0.902218 + }, + { + "accuracy": 0.906187, + "f1": 0.896956, + "f1_weighted": 0.903729 + }, + { + "accuracy": 0.906187, + "f1": 0.896458, + "f1_weighted": 0.90364 + }, + { + "accuracy": 0.901816, + "f1": 0.890091, + "f1_weighted": 0.897697 + }, + { + "accuracy": 0.904169, + "f1": 0.894311, + "f1_weighted": 0.900852 + }, + { + "accuracy": 0.899462, + "f1": 0.888602, + "f1_weighted": 0.895651 + }, + { + "accuracy": 0.905514, + "f1": 0.898568, + "f1_weighted": 0.903853 + }, + { + "accuracy": 0.895427, + "f1": 0.886345, + "f1_weighted": 0.893036 + }, + { + "accuracy": 0.909549, + "f1": 0.902018, + "f1_weighted": 0.907102 + }, + { + "accuracy": 0.906859, + "f1": 0.897391, + "f1_weighted": 0.904782 + } + ], + "main_score": 0.903968, + "hf_subset": "en", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 41.71501660346985, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json new file mode 100644 index 0000000000..08e35886ea --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json @@ -0,0 +1,34 @@ +{ + "dataset_revision": "e7a26af6f3ae46b30dde8737f02c07b1505bcc73", + "task_name": "MedrxivClusteringP2P.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.469436, + 0.472021, + 0.470121, + 0.461723, + 0.455868, + 0.442909, + 0.463523, + 0.454351, + 0.459568, + 0.451916 + ] + }, + "v_measure": 0.460144, + "v_measure_std": 0.008705, + "main_score": 0.460144, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 72.25293397903442, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json new file mode 100644 index 0000000000..2395c50ff3 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json @@ -0,0 +1,34 @@ +{ + "dataset_revision": "35191c8c0dca72d8ff3efcd72aa802307d469663", + "task_name": "MedrxivClusteringS2S.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.478179, + 0.466183, + 0.486015, + 0.479123, + 0.471071, + 0.454717, + 0.477185, + 0.47782, + 0.48444, + 0.499146 + ] + }, + "v_measure": 0.477388, + "v_measure_std": 0.011318, + "main_score": 0.477388, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 22.133918523788452, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json new file mode 100644 index 0000000000..3cc22c3e43 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "59042f120c80e8afa9cdbb224f67076cec0fc9a7", + "task_name": "MindSmallReranking", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "map": 0.320956, + "mrr": 0.332124, + "nAUC_map_max": -0.199141, + "nAUC_map_std": -0.070284, + "nAUC_map_diff1": 0.104873, + "nAUC_mrr_max": -0.148112, + "nAUC_mrr_std": -0.041584, + "nAUC_mrr_diff1": 0.100313, + "main_score": 0.320956, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 414.89182472229004, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json new file mode 100644 index 0000000000..7c47db7023 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "f8c2fcf00f625baaa80f62ec5bd9e1fff3b8ae88", + "task_name": "SCIDOCS", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.252, + "ndcg_at_3": 0.20703, + "ndcg_at_5": 0.18106, + "ndcg_at_10": 0.22138, + "ndcg_at_20": 0.25208, + "ndcg_at_100": 0.30323, + "ndcg_at_1000": 0.35633, + "map_at_1": 0.05128, + "map_at_3": 0.09311, + "map_at_5": 0.11204, + "map_at_10": 0.13322, + "map_at_20": 0.1453, + "map_at_100": 0.1559, + "map_at_1000": 0.15905, + "recall_at_1": 0.05128, + "recall_at_3": 0.11818, + "recall_at_5": 0.16183, + "recall_at_10": 0.23557, + "recall_at_20": 0.30777, + "recall_at_100": 0.4744, + "recall_at_1000": 0.73357, + "precision_at_1": 0.252, + "precision_at_3": 0.19467, + "precision_at_5": 0.1598, + "precision_at_10": 0.1163, + "precision_at_20": 0.0759, + "precision_at_100": 0.02337, + "precision_at_1000": 0.00361, + "mrr_at_1": 0.252, + "mrr_at_3": 0.332333, + "mrr_at_5": 0.351733, + "mrr_at_10": 0.367038, + "mrr_at_20": 0.37311, + "mrr_at_100": 0.376832, + "mrr_at_1000": 0.377298, + "nauc_ndcg_at_1_max": 0.132314, + "nauc_ndcg_at_1_std": 0.022706, + "nauc_ndcg_at_1_diff1": 0.255776, + "nauc_ndcg_at_3_max": 0.184053, + "nauc_ndcg_at_3_std": 0.068617, + "nauc_ndcg_at_3_diff1": 0.179658, + "nauc_ndcg_at_5_max": 0.203945, + "nauc_ndcg_at_5_std": 0.085633, + "nauc_ndcg_at_5_diff1": 0.17273, + "nauc_ndcg_at_10_max": 0.237992, + "nauc_ndcg_at_10_std": 0.121169, + "nauc_ndcg_at_10_diff1": 0.161917, + "nauc_ndcg_at_20_max": 0.250473, + "nauc_ndcg_at_20_std": 0.151667, + "nauc_ndcg_at_20_diff1": 0.145642, + "nauc_ndcg_at_100_max": 0.256369, + "nauc_ndcg_at_100_std": 0.195937, + "nauc_ndcg_at_100_diff1": 0.128448, + "nauc_ndcg_at_1000_max": 0.252292, + "nauc_ndcg_at_1000_std": 0.207183, + "nauc_ndcg_at_1000_diff1": 0.146064, + "nauc_map_at_1_max": 0.130941, + "nauc_map_at_1_std": 0.023359, + "nauc_map_at_1_diff1": 0.255424, + "nauc_map_at_3_max": 0.167615, + "nauc_map_at_3_std": 0.054483, + "nauc_map_at_3_diff1": 0.171017, + "nauc_map_at_5_max": 0.191323, + "nauc_map_at_5_std": 0.065661, + "nauc_map_at_5_diff1": 0.165351, + "nauc_map_at_10_max": 0.224021, + "nauc_map_at_10_std": 0.094602, + "nauc_map_at_10_diff1": 0.153811, + "nauc_map_at_20_max": 0.234419, + "nauc_map_at_20_std": 0.115929, + "nauc_map_at_20_diff1": 0.146083, + "nauc_map_at_100_max": 0.236741, + "nauc_map_at_100_std": 0.130266, + "nauc_map_at_100_diff1": 0.139052, + "nauc_map_at_1000_max": 0.236901, + "nauc_map_at_1000_std": 0.132993, + "nauc_map_at_1000_diff1": 0.140176, + "nauc_recall_at_1_max": 0.130941, + "nauc_recall_at_1_std": 0.023359, + "nauc_recall_at_1_diff1": 0.255424, + "nauc_recall_at_3_max": 0.197812, + "nauc_recall_at_3_std": 0.088512, + "nauc_recall_at_3_diff1": 0.151812, + "nauc_recall_at_5_max": 0.217625, + "nauc_recall_at_5_std": 0.108726, + "nauc_recall_at_5_diff1": 0.133694, + "nauc_recall_at_10_max": 0.268037, + "nauc_recall_at_10_std": 0.166894, + "nauc_recall_at_10_diff1": 0.113644, + "nauc_recall_at_20_max": 0.275478, + "nauc_recall_at_20_std": 0.21798, + "nauc_recall_at_20_diff1": 0.076262, + "nauc_recall_at_100_max": 0.259318, + "nauc_recall_at_100_std": 0.312458, + "nauc_recall_at_100_diff1": 0.023778, + "nauc_recall_at_1000_max": 0.218477, + "nauc_recall_at_1000_std": 0.374135, + "nauc_recall_at_1000_diff1": 0.068429, + "nauc_precision_at_1_max": 0.132314, + "nauc_precision_at_1_std": 0.022706, + "nauc_precision_at_1_diff1": 0.255776, + "nauc_precision_at_3_max": 0.198521, + "nauc_precision_at_3_std": 0.08748, + "nauc_precision_at_3_diff1": 0.14995, + "nauc_precision_at_5_max": 0.219281, + "nauc_precision_at_5_std": 0.108113, + "nauc_precision_at_5_diff1": 0.131082, + "nauc_precision_at_10_max": 0.269814, + "nauc_precision_at_10_std": 0.166158, + "nauc_precision_at_10_diff1": 0.113115, + "nauc_precision_at_20_max": 0.277509, + "nauc_precision_at_20_std": 0.215859, + "nauc_precision_at_20_diff1": 0.076274, + "nauc_precision_at_100_max": 0.260412, + "nauc_precision_at_100_std": 0.305294, + "nauc_precision_at_100_diff1": 0.024876, + "nauc_precision_at_1000_max": 0.214205, + "nauc_precision_at_1000_std": 0.350027, + "nauc_precision_at_1000_diff1": 0.064729, + "nauc_mrr_at_1_max": 0.132314, + "nauc_mrr_at_1_std": 0.022706, + "nauc_mrr_at_1_diff1": 0.255776, + "nauc_mrr_at_3_max": 0.180223, + "nauc_mrr_at_3_std": 0.063513, + "nauc_mrr_at_3_diff1": 0.222355, + "nauc_mrr_at_5_max": 0.183784, + "nauc_mrr_at_5_std": 0.071545, + "nauc_mrr_at_5_diff1": 0.219069, + "nauc_mrr_at_10_max": 0.185163, + "nauc_mrr_at_10_std": 0.07802, + "nauc_mrr_at_10_diff1": 0.220358, + "nauc_mrr_at_20_max": 0.184588, + "nauc_mrr_at_20_std": 0.079026, + "nauc_mrr_at_20_diff1": 0.217033, + "nauc_mrr_at_100_max": 0.185497, + "nauc_mrr_at_100_std": 0.080738, + "nauc_mrr_at_100_diff1": 0.217726, + "nauc_mrr_at_1000_max": 0.185253, + "nauc_mrr_at_1000_std": 0.080226, + "nauc_mrr_at_1000_diff1": 0.217972, + "main_score": 0.22138, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 625.1164221763611, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json new file mode 100644 index 0000000000..b71a42073a --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "20a6d6f312dd54037fe07a32d58e5e168867909d", + "task_name": "SICK-R", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.844918, + "spearman": 0.800756, + "cosine_pearson": 0.844918, + "cosine_spearman": 0.800756, + "manhattan_pearson": 0.814647, + "manhattan_spearman": 0.800268, + "euclidean_pearson": 0.815444, + "euclidean_spearman": 0.800756, + "main_score": 0.800756, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 131.42402482032776, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json new file mode 100644 index 0000000000..0bbfa10efc --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "a0d554a64d88156834ff5ae9920b964011b16384", + "task_name": "STS12", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.78451, + "spearman": 0.678271, + "cosine_pearson": 0.78451, + "cosine_spearman": 0.678271, + "manhattan_pearson": 0.731829, + "manhattan_spearman": 0.679834, + "euclidean_pearson": 0.73113, + "euclidean_spearman": 0.678271, + "main_score": 0.678271, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 42.83954191207886, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json new file mode 100644 index 0000000000..4aa85bca75 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "7e90230a92c190f1bf69ae9002b8cea547a64cca", + "task_name": "STS13", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.826875, + "spearman": 0.822776, + "cosine_pearson": 0.826875, + "cosine_spearman": 0.822776, + "manhattan_pearson": 0.817308, + "manhattan_spearman": 0.823147, + "euclidean_pearson": 0.817555, + "euclidean_spearman": 0.822776, + "main_score": 0.822776, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 20.056350708007812, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json new file mode 100644 index 0000000000..9d3772c642 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "6031580fec1f6af667f0bd2da0a551cf4f0b2375", + "task_name": "STS14", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.822621, + "spearman": 0.782074, + "cosine_pearson": 0.822621, + "cosine_spearman": 0.782074, + "manhattan_pearson": 0.801011, + "manhattan_spearman": 0.781978, + "euclidean_pearson": 0.801609, + "euclidean_spearman": 0.782074, + "main_score": 0.782074, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 50.56356692314148, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json new file mode 100644 index 0000000000..640326a0e5 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "ae752c7c21bf194d8b67fd573edf7ae58183cbe3", + "task_name": "STS15", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.868949, + "spearman": 0.870581, + "cosine_pearson": 0.868949, + "cosine_spearman": 0.870581, + "manhattan_pearson": 0.859647, + "manhattan_spearman": 0.870117, + "euclidean_pearson": 0.860584, + "euclidean_spearman": 0.870581, + "main_score": 0.870581, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 40.752835512161255, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json new file mode 100644 index 0000000000..e85fa49cf8 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "faeb762787bd10488a50c8b5be4a3b82e411949c", + "task_name": "STS17", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.907977, + "spearman": 0.898873, + "cosine_pearson": 0.907977, + "cosine_spearman": 0.898873, + "manhattan_pearson": 0.901723, + "manhattan_spearman": 0.896652, + "euclidean_pearson": 0.90312, + "euclidean_spearman": 0.898873, + "main_score": 0.898873, + "hf_subset": "en-en", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 6.136534690856934, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json new file mode 100644 index 0000000000..2c7d8feda7 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "d31f33a128469b20e357535c39b82fb3c3f6f2bd", + "task_name": "STS22.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.723773, + "spearman": 0.717025, + "cosine_pearson": 0.723773, + "cosine_spearman": 0.717025, + "manhattan_pearson": 0.725783, + "manhattan_spearman": 0.715156, + "euclidean_pearson": 0.725993, + "euclidean_spearman": 0.717025, + "main_score": 0.717025, + "hf_subset": "en", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 44.09387993812561, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json new file mode 100644 index 0000000000..f0874d6993 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json @@ -0,0 +1,26 @@ +{ + "dataset_revision": "b0fddb56ed78048fa8b90373c8a3cfc37b684831", + "task_name": "STSBenchmark", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.846459, + "spearman": 0.840544, + "cosine_pearson": 0.846459, + "cosine_spearman": 0.840544, + "manhattan_pearson": 0.840583, + "manhattan_spearman": 0.841876, + "euclidean_pearson": 0.839721, + "euclidean_spearman": 0.840544, + "main_score": 0.840544, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 18.82219958305359, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json new file mode 100644 index 0000000000..7f1f6ab8d5 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json @@ -0,0 +1,58 @@ +{ + "dataset_revision": "d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46", + "task_name": "SprintDuplicateQuestions", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "similarity_accuracy": 0.997366, + "similarity_accuracy_threshold": 0.716425, + "similarity_f1": 0.863231, + "similarity_f1_threshold": 0.69047, + "similarity_precision": 0.855599, + "similarity_recall": 0.871, + "similarity_ap": 0.935118, + "cosine_accuracy": 0.997366, + "cosine_accuracy_threshold": 0.716425, + "cosine_f1": 0.863231, + "cosine_f1_threshold": 0.69047, + "cosine_precision": 0.855599, + "cosine_recall": 0.871, + "cosine_ap": 0.935118, + "manhattan_accuracy": 0.997406, + "manhattan_accuracy_threshold": 37.778702, + "manhattan_f1": 0.863825, + "manhattan_f1_threshold": 37.814972, + "manhattan_precision": 0.899351, + "manhattan_recall": 0.831, + "manhattan_ap": 0.935964, + "euclidean_accuracy": 0.997366, + "euclidean_accuracy_threshold": 0.753093, + "euclidean_f1": 0.863231, + "euclidean_f1_threshold": 0.786804, + "euclidean_precision": 0.855599, + "euclidean_recall": 0.871, + "euclidean_ap": 0.935118, + "dot_accuracy": 0.997366, + "dot_accuracy_threshold": 0.716425, + "dot_f1": 0.863231, + "dot_f1_threshold": 0.690469, + "dot_precision": 0.855599, + "dot_recall": 0.871, + "dot_ap": 0.935118, + "max_accuracy": 0.997406, + "max_f1": 0.863825, + "max_precision": 0.899351, + "max_recall": 0.871, + "max_ap": 0.935964, + "main_score": 0.935964, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 76.13990664482117, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json new file mode 100644 index 0000000000..58ff13c38e --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json @@ -0,0 +1,34 @@ +{ + "dataset_revision": "6cbc1f7b2bc0622f2e39d2c77fa502909748c259", + "task_name": "StackExchangeClustering.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.540841, + 0.536082, + 0.524139, + 0.528722, + 0.543624, + 0.542468, + 0.548276, + 0.530183, + 0.543604, + 0.547097 + ] + }, + "v_measure": 0.538504, + "v_measure_std": 0.007873, + "main_score": 0.538504, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 26.84200930595398, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json new file mode 100644 index 0000000000..81db70ade2 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json @@ -0,0 +1,34 @@ +{ + "dataset_revision": "815ca46b2622cec33ccafc3735d572c266efdb44", + "task_name": "StackExchangeClusteringP2P.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.399592, + 0.405015, + 0.406507, + 0.406, + 0.394811, + 0.406524, + 0.401406, + 0.402619, + 0.407485, + 0.404686 + ] + }, + "v_measure": 0.403465, + "v_measure_std": 0.003741, + "main_score": 0.403465, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 107.24846076965332, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json new file mode 100644 index 0000000000..97c77715c5 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json @@ -0,0 +1,24 @@ +{ + "dataset_revision": "cda12ad7615edc362dbf25a00fdd61d3b1eaf93c", + "task_name": "SummEvalSummarization.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "pearson": 0.366147, + "spearman": 0.304292, + "cosine_spearman": 0.304292, + "cosine_pearson": 0.366147, + "dot_spearman": 0.304292, + "dot_pearson": 0.366146, + "main_score": 0.304292, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 28.100568771362305, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json new file mode 100644 index 0000000000..b57d6b24c6 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "bb9466bac8153a0349341eb1b22e06409e78ef4e", + "task_name": "TRECCOVID", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.87, + "ndcg_at_3": 0.81989, + "ndcg_at_5": 0.80489, + "ndcg_at_10": 0.77264, + "ndcg_at_20": 0.74387, + "ndcg_at_100": 0.61169, + "ndcg_at_1000": 0.5401, + "map_at_1": 0.00239, + "map_at_3": 0.0067, + "map_at_5": 0.01054, + "map_at_10": 0.01958, + "map_at_20": 0.03535, + "map_at_100": 0.12127, + "map_at_1000": 0.28454, + "recall_at_1": 0.00239, + "recall_at_3": 0.00693, + "recall_at_5": 0.0111, + "recall_at_10": 0.0213, + "recall_at_20": 0.04006, + "recall_at_100": 0.15432, + "recall_at_1000": 0.50872, + "precision_at_1": 0.9, + "precision_at_3": 0.86667, + "precision_at_5": 0.844, + "precision_at_10": 0.808, + "precision_at_20": 0.78, + "precision_at_100": 0.63, + "precision_at_1000": 0.23364, + "mrr_at_1": 0.9, + "mrr_at_3": 0.933333, + "mrr_at_5": 0.933333, + "mrr_at_10": 0.933333, + "mrr_at_20": 0.934762, + "mrr_at_100": 0.934762, + "mrr_at_1000": 0.934762, + "nauc_ndcg_at_1_max": 0.319966, + "nauc_ndcg_at_1_std": 0.340332, + "nauc_ndcg_at_1_diff1": -0.172443, + "nauc_ndcg_at_3_max": 0.277444, + "nauc_ndcg_at_3_std": 0.348258, + "nauc_ndcg_at_3_diff1": -0.083164, + "nauc_ndcg_at_5_max": 0.31727, + "nauc_ndcg_at_5_std": 0.41698, + "nauc_ndcg_at_5_diff1": -0.076063, + "nauc_ndcg_at_10_max": 0.313728, + "nauc_ndcg_at_10_std": 0.515152, + "nauc_ndcg_at_10_diff1": -0.134271, + "nauc_ndcg_at_20_max": 0.393279, + "nauc_ndcg_at_20_std": 0.630115, + "nauc_ndcg_at_20_diff1": -0.121746, + "nauc_ndcg_at_100_max": 0.433678, + "nauc_ndcg_at_100_std": 0.723514, + "nauc_ndcg_at_100_diff1": -0.165524, + "nauc_ndcg_at_1000_max": 0.432502, + "nauc_ndcg_at_1000_std": 0.654705, + "nauc_ndcg_at_1000_diff1": 0.035785, + "nauc_map_at_1_max": -0.040436, + "nauc_map_at_1_std": -0.051995, + "nauc_map_at_1_diff1": 0.195967, + "nauc_map_at_3_max": -0.002203, + "nauc_map_at_3_std": 0.008209, + "nauc_map_at_3_diff1": 0.253341, + "nauc_map_at_5_max": 0.053072, + "nauc_map_at_5_std": 0.070807, + "nauc_map_at_5_diff1": 0.29235, + "nauc_map_at_10_max": 0.084483, + "nauc_map_at_10_std": 0.13676, + "nauc_map_at_10_diff1": 0.298854, + "nauc_map_at_20_max": 0.205355, + "nauc_map_at_20_std": 0.282301, + "nauc_map_at_20_diff1": 0.275912, + "nauc_map_at_100_max": 0.368922, + "nauc_map_at_100_std": 0.540407, + "nauc_map_at_100_diff1": 0.156115, + "nauc_map_at_1000_max": 0.52721, + "nauc_map_at_1000_std": 0.743527, + "nauc_map_at_1000_diff1": 0.002973, + "nauc_recall_at_1_max": -0.040436, + "nauc_recall_at_1_std": -0.051995, + "nauc_recall_at_1_diff1": 0.195967, + "nauc_recall_at_3_max": -0.016786, + "nauc_recall_at_3_std": -0.00765, + "nauc_recall_at_3_diff1": 0.270141, + "nauc_recall_at_5_max": 0.041662, + "nauc_recall_at_5_std": 0.046073, + "nauc_recall_at_5_diff1": 0.329549, + "nauc_recall_at_10_max": 0.053906, + "nauc_recall_at_10_std": 0.087569, + "nauc_recall_at_10_diff1": 0.320571, + "nauc_recall_at_20_max": 0.154484, + "nauc_recall_at_20_std": 0.200305, + "nauc_recall_at_20_diff1": 0.313207, + "nauc_recall_at_100_max": 0.249311, + "nauc_recall_at_100_std": 0.378119, + "nauc_recall_at_100_diff1": 0.207397, + "nauc_recall_at_1000_max": 0.389597, + "nauc_recall_at_1000_std": 0.522329, + "nauc_recall_at_1000_diff1": 0.086765, + "nauc_precision_at_1_max": 0.312512, + "nauc_precision_at_1_std": 0.35733, + "nauc_precision_at_1_diff1": -0.327918, + "nauc_precision_at_3_max": 0.297858, + "nauc_precision_at_3_std": 0.428622, + "nauc_precision_at_3_diff1": -0.114638, + "nauc_precision_at_5_max": 0.38742, + "nauc_precision_at_5_std": 0.491801, + "nauc_precision_at_5_diff1": -0.023161, + "nauc_precision_at_10_max": 0.357688, + "nauc_precision_at_10_std": 0.580349, + "nauc_precision_at_10_diff1": -0.066072, + "nauc_precision_at_20_max": 0.481647, + "nauc_precision_at_20_std": 0.723752, + "nauc_precision_at_20_diff1": -0.033191, + "nauc_precision_at_100_max": 0.466879, + "nauc_precision_at_100_std": 0.752783, + "nauc_precision_at_100_diff1": -0.180692, + "nauc_precision_at_1000_max": 0.350212, + "nauc_precision_at_1000_std": 0.452383, + "nauc_precision_at_1000_diff1": -0.234271, + "nauc_mrr_at_1_max": 0.312512, + "nauc_mrr_at_1_std": 0.35733, + "nauc_mrr_at_1_diff1": -0.327918, + "nauc_mrr_at_3_max": 0.433987, + "nauc_mrr_at_3_std": 0.395962, + "nauc_mrr_at_3_diff1": -0.285761, + "nauc_mrr_at_5_max": 0.433987, + "nauc_mrr_at_5_std": 0.395962, + "nauc_mrr_at_5_diff1": -0.285761, + "nauc_mrr_at_10_max": 0.433987, + "nauc_mrr_at_10_std": 0.395962, + "nauc_mrr_at_10_diff1": -0.285761, + "nauc_mrr_at_20_max": 0.421592, + "nauc_mrr_at_20_std": 0.382735, + "nauc_mrr_at_20_diff1": -0.288267, + "nauc_mrr_at_100_max": 0.421592, + "nauc_mrr_at_100_std": 0.382735, + "nauc_mrr_at_100_diff1": -0.288267, + "nauc_mrr_at_1000_max": 0.421592, + "nauc_mrr_at_1000_std": 0.382735, + "nauc_mrr_at_1000_diff1": -0.288267, + "main_score": 0.77264, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 4529.545472621918, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json new file mode 100644 index 0000000000..2297a2a37b --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "431886eaecc48f067a3975b70d0949ea2862463c", + "task_name": "Touche2020Retrieval.v3", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "ndcg_at_1": 0.59184, + "ndcg_at_3": 0.59412, + "ndcg_at_5": 0.59416, + "ndcg_at_10": 0.54375, + "ndcg_at_20": 0.4847, + "ndcg_at_100": 0.57097, + "ndcg_at_1000": 0.6953, + "map_at_1": 0.02513, + "map_at_3": 0.06282, + "map_at_5": 0.0986, + "map_at_10": 0.16275, + "map_at_20": 0.23549, + "map_at_100": 0.33525, + "map_at_1000": 0.36911, + "recall_at_1": 0.02513, + "recall_at_3": 0.07247, + "recall_at_5": 0.11754, + "recall_at_10": 0.20727, + "recall_at_20": 0.32399, + "recall_at_100": 0.60415, + "recall_at_1000": 0.91864, + "precision_at_1": 0.63265, + "precision_at_3": 0.65306, + "precision_at_5": 0.64898, + "precision_at_10": 0.56735, + "precision_at_20": 0.44898, + "precision_at_100": 0.19102, + "precision_at_1000": 0.03171, + "mrr_at_1": 0.632653, + "mrr_at_3": 0.792517, + "mrr_at_5": 0.792517, + "mrr_at_10": 0.794558, + "mrr_at_20": 0.794558, + "mrr_at_100": 0.794558, + "mrr_at_1000": 0.794558, + "nauc_ndcg_at_1_max": 0.0031, + "nauc_ndcg_at_1_std": -0.060354, + "nauc_ndcg_at_1_diff1": 0.223465, + "nauc_ndcg_at_3_max": -0.014339, + "nauc_ndcg_at_3_std": 0.152748, + "nauc_ndcg_at_3_diff1": 0.042194, + "nauc_ndcg_at_5_max": 0.037394, + "nauc_ndcg_at_5_std": 0.14457, + "nauc_ndcg_at_5_diff1": 0.063942, + "nauc_ndcg_at_10_max": 0.087768, + "nauc_ndcg_at_10_std": 0.078527, + "nauc_ndcg_at_10_diff1": 0.130966, + "nauc_ndcg_at_20_max": 0.012104, + "nauc_ndcg_at_20_std": -0.000209, + "nauc_ndcg_at_20_diff1": 0.19872, + "nauc_ndcg_at_100_max": -0.020205, + "nauc_ndcg_at_100_std": 0.216292, + "nauc_ndcg_at_100_diff1": 0.122909, + "nauc_ndcg_at_1000_max": 0.09298, + "nauc_ndcg_at_1000_std": 0.345245, + "nauc_ndcg_at_1000_diff1": 0.064977, + "nauc_map_at_1_max": -0.152542, + "nauc_map_at_1_std": -0.273752, + "nauc_map_at_1_diff1": 0.272483, + "nauc_map_at_3_max": -0.162742, + "nauc_map_at_3_std": -0.199249, + "nauc_map_at_3_diff1": 0.145412, + "nauc_map_at_5_max": -0.082431, + "nauc_map_at_5_std": -0.163146, + "nauc_map_at_5_diff1": 0.145597, + "nauc_map_at_10_max": -0.019242, + "nauc_map_at_10_std": -0.201441, + "nauc_map_at_10_diff1": 0.163093, + "nauc_map_at_20_max": -0.022388, + "nauc_map_at_20_std": -0.168136, + "nauc_map_at_20_diff1": 0.203174, + "nauc_map_at_100_max": -0.010976, + "nauc_map_at_100_std": 0.033828, + "nauc_map_at_100_diff1": 0.149118, + "nauc_map_at_1000_max": 0.028871, + "nauc_map_at_1000_std": 0.080067, + "nauc_map_at_1000_diff1": 0.13007, + "nauc_recall_at_1_max": -0.152542, + "nauc_recall_at_1_std": -0.273752, + "nauc_recall_at_1_diff1": 0.272483, + "nauc_recall_at_3_max": -0.207296, + "nauc_recall_at_3_std": -0.217187, + "nauc_recall_at_3_diff1": 0.10113, + "nauc_recall_at_5_max": -0.114634, + "nauc_recall_at_5_std": -0.178391, + "nauc_recall_at_5_diff1": 0.152127, + "nauc_recall_at_10_max": -0.076332, + "nauc_recall_at_10_std": -0.22394, + "nauc_recall_at_10_diff1": 0.168213, + "nauc_recall_at_20_max": -0.092197, + "nauc_recall_at_20_std": -0.168148, + "nauc_recall_at_20_diff1": 0.21108, + "nauc_recall_at_100_max": -0.054952, + "nauc_recall_at_100_std": 0.305578, + "nauc_recall_at_100_diff1": 0.07506, + "nauc_recall_at_1000_max": 0.090864, + "nauc_recall_at_1000_std": 0.721191, + "nauc_recall_at_1000_diff1": -0.160742, + "nauc_precision_at_1_max": -0.001185, + "nauc_precision_at_1_std": -0.151142, + "nauc_precision_at_1_diff1": 0.315946, + "nauc_precision_at_3_max": 0.064279, + "nauc_precision_at_3_std": 0.181232, + "nauc_precision_at_3_diff1": -0.060525, + "nauc_precision_at_5_max": 0.114323, + "nauc_precision_at_5_std": 0.129756, + "nauc_precision_at_5_diff1": 0.049106, + "nauc_precision_at_10_max": 0.179161, + "nauc_precision_at_10_std": 0.042662, + "nauc_precision_at_10_diff1": 0.115144, + "nauc_precision_at_20_max": 0.127826, + "nauc_precision_at_20_std": 0.113739, + "nauc_precision_at_20_diff1": 0.17017, + "nauc_precision_at_100_max": 0.296953, + "nauc_precision_at_100_std": 0.700302, + "nauc_precision_at_100_diff1": -0.237039, + "nauc_precision_at_1000_max": 0.36318, + "nauc_precision_at_1000_std": 0.36211, + "nauc_precision_at_1000_diff1": -0.258314, + "nauc_mrr_at_1_max": -0.001185, + "nauc_mrr_at_1_std": -0.151142, + "nauc_mrr_at_1_diff1": 0.315946, + "nauc_mrr_at_3_max": -0.023875, + "nauc_mrr_at_3_std": -0.090574, + "nauc_mrr_at_3_diff1": 0.371284, + "nauc_mrr_at_5_max": -0.023875, + "nauc_mrr_at_5_std": -0.090574, + "nauc_mrr_at_5_diff1": 0.371284, + "nauc_mrr_at_10_max": -0.022177, + "nauc_mrr_at_10_std": -0.102039, + "nauc_mrr_at_10_diff1": 0.367104, + "nauc_mrr_at_20_max": -0.022177, + "nauc_mrr_at_20_std": -0.102039, + "nauc_mrr_at_20_diff1": 0.367104, + "nauc_mrr_at_100_max": -0.022177, + "nauc_mrr_at_100_std": -0.102039, + "nauc_mrr_at_100_diff1": 0.367104, + "nauc_mrr_at_1000_max": -0.022177, + "nauc_mrr_at_1000_std": -0.102039, + "nauc_mrr_at_1000_diff1": 0.367104, + "main_score": 0.54375, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 9911.160651683807, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json new file mode 100644 index 0000000000..1c67e4acca --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json @@ -0,0 +1,95 @@ +{ + "dataset_revision": "edfaf9da55d3dd50d43143d90c1ac476895ae6de", + "task_name": "ToxicConversationsClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.845605, + "f1": 0.679157, + "f1_weighted": 0.873217, + "ap": 0.263654, + "ap_weighted": 0.263654, + "scores_per_experiment": [ + { + "accuracy": 0.860352, + "f1": 0.692064, + "f1_weighted": 0.883694, + "ap": 0.273699, + "ap_weighted": 0.273699 + }, + { + "accuracy": 0.841797, + "f1": 0.661844, + "f1_weighted": 0.8695, + "ap": 0.231204, + "ap_weighted": 0.231204 + }, + { + "accuracy": 0.901855, + "f1": 0.740462, + "f1_weighted": 0.912748, + "ap": 0.328962, + "ap_weighted": 0.328962 + }, + { + "accuracy": 0.907715, + "f1": 0.74982, + "f1_weighted": 0.917128, + "ap": 0.343163, + "ap_weighted": 0.343163 + }, + { + "accuracy": 0.869141, + "f1": 0.696822, + "f1_weighted": 0.88923, + "ap": 0.272802, + "ap_weighted": 0.272802 + }, + { + "accuracy": 0.810547, + "f1": 0.645241, + "f1_weighted": 0.849095, + "ap": 0.232211, + "ap_weighted": 0.232211 + }, + { + "accuracy": 0.89209, + "f1": 0.731933, + "f1_weighted": 0.906355, + "ap": 0.322754, + "ap_weighted": 0.322754 + }, + { + "accuracy": 0.772461, + "f1": 0.613257, + "f1_weighted": 0.822136, + "ap": 0.207447, + "ap_weighted": 0.207447 + }, + { + "accuracy": 0.791992, + "f1": 0.622631, + "f1_weighted": 0.835444, + "ap": 0.205714, + "ap_weighted": 0.205714 + }, + { + "accuracy": 0.808105, + "f1": 0.637494, + "f1_weighted": 0.846842, + "ap": 0.218587, + "ap_weighted": 0.218587 + } + ], + "main_score": 0.845605, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 54.98096585273743, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json new file mode 100644 index 0000000000..7ca393bb99 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "d604517c81ca91fe16a244d1248fc021f9ecee7a", + "task_name": "TweetSentimentExtractionClassification", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "accuracy": 0.772269, + "f1": 0.775876, + "f1_weighted": 0.770664, + "scores_per_experiment": [ + { + "accuracy": 0.765139, + "f1": 0.767993, + "f1_weighted": 0.762139 + }, + { + "accuracy": 0.768251, + "f1": 0.772352, + "f1_weighted": 0.767225 + }, + { + "accuracy": 0.777589, + "f1": 0.781435, + "f1_weighted": 0.776624 + }, + { + "accuracy": 0.766271, + "f1": 0.770053, + "f1_weighted": 0.764319 + }, + { + "accuracy": 0.764007, + "f1": 0.766902, + "f1_weighted": 0.760922 + }, + { + "accuracy": 0.77957, + "f1": 0.783426, + "f1_weighted": 0.778862 + }, + { + "accuracy": 0.77957, + "f1": 0.782867, + "f1_weighted": 0.778739 + }, + { + "accuracy": 0.779853, + "f1": 0.783523, + "f1_weighted": 0.778207 + }, + { + "accuracy": 0.769949, + "f1": 0.773471, + "f1_weighted": 0.767648 + }, + { + "accuracy": 0.772496, + "f1": 0.776735, + "f1_weighted": 0.771952 + } + ], + "main_score": 0.772269, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 39.811750411987305, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json new file mode 100644 index 0000000000..624ecfc5ed --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json @@ -0,0 +1,34 @@ +{ + "dataset_revision": "6125ec4e24fa026cec8a478383ee943acfbd5449", + "task_name": "TwentyNewsgroupsClustering.v2", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "v_measures": { + "Level 0": [ + 0.866166, + 0.88221, + 0.862244, + 0.885769, + 0.881305, + 0.869038, + 0.894773, + 0.868096, + 0.873039, + 0.875453 + ] + }, + "v_measure": 0.875809, + "v_measure_std": 0.009593, + "main_score": 0.875809, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 23.761215925216675, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json new file mode 100644 index 0000000000..8afbfcb66d --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json @@ -0,0 +1,58 @@ +{ + "dataset_revision": "70970daeab8776df92f5ea462b6173c0b46fd2d1", + "task_name": "TwitterSemEval2015", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "similarity_accuracy": 0.844132, + "similarity_accuracy_threshold": 0.74313, + "similarity_f1": 0.633822, + "similarity_f1_threshold": 0.658244, + "similarity_precision": 0.560146, + "similarity_recall": 0.729815, + "similarity_ap": 0.679631, + "cosine_accuracy": 0.844132, + "cosine_accuracy_threshold": 0.74313, + "cosine_f1": 0.633822, + "cosine_f1_threshold": 0.658244, + "cosine_precision": 0.560146, + "cosine_recall": 0.729815, + "cosine_ap": 0.679631, + "manhattan_accuracy": 0.843893, + "manhattan_accuracy_threshold": 35.718842, + "manhattan_f1": 0.632309, + "manhattan_f1_threshold": 41.56741, + "manhattan_precision": 0.5558, + "manhattan_recall": 0.733245, + "manhattan_ap": 0.677971, + "euclidean_accuracy": 0.844132, + "euclidean_accuracy_threshold": 0.716756, + "euclidean_f1": 0.633822, + "euclidean_f1_threshold": 0.826748, + "euclidean_precision": 0.560146, + "euclidean_recall": 0.729815, + "euclidean_ap": 0.679632, + "dot_accuracy": 0.844132, + "dot_accuracy_threshold": 0.74313, + "dot_f1": 0.633822, + "dot_f1_threshold": 0.658244, + "dot_precision": 0.560146, + "dot_recall": 0.729815, + "dot_ap": 0.67963, + "max_accuracy": 0.844132, + "max_f1": 0.633822, + "max_precision": 0.560146, + "max_recall": 0.733245, + "max_ap": 0.679632, + "main_score": 0.679632, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 125.90312480926514, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json new file mode 100644 index 0000000000..2dfee6473d --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json @@ -0,0 +1,58 @@ +{ + "dataset_revision": "8b6510b0b1fa4e4c4f879467980e9be563ec1cdf", + "task_name": "TwitterURLCorpus", + "mteb_version": "1.31.2", + "scores": { + "test": [ + { + "similarity_accuracy": 0.890771, + "similarity_accuracy_threshold": 0.648158, + "similarity_f1": 0.785168, + "similarity_f1_threshold": 0.618626, + "similarity_precision": 0.755209, + "similarity_recall": 0.817601, + "similarity_ap": 0.859808, + "cosine_accuracy": 0.890771, + "cosine_accuracy_threshold": 0.648158, + "cosine_f1": 0.785168, + "cosine_f1_threshold": 0.618626, + "cosine_precision": 0.755209, + "cosine_recall": 0.817601, + "cosine_ap": 0.859808, + "manhattan_accuracy": 0.890694, + "manhattan_accuracy_threshold": 42.312912, + "manhattan_f1": 0.784835, + "manhattan_f1_threshold": 43.315178, + "manhattan_precision": 0.767922, + "manhattan_recall": 0.80251, + "manhattan_ap": 0.859548, + "euclidean_accuracy": 0.890771, + "euclidean_accuracy_threshold": 0.838859, + "euclidean_f1": 0.785168, + "euclidean_f1_threshold": 0.873355, + "euclidean_precision": 0.755209, + "euclidean_recall": 0.817601, + "euclidean_ap": 0.859808, + "dot_accuracy": 0.890771, + "dot_accuracy_threshold": 0.648158, + "dot_f1": 0.785168, + "dot_f1_threshold": 0.618625, + "dot_precision": 0.755209, + "dot_recall": 0.817601, + "dot_ap": 0.859808, + "max_accuracy": 0.890771, + "max_f1": 0.785168, + "max_precision": 0.767922, + "max_recall": 0.817601, + "max_ap": 0.859808, + "main_score": 0.859808, + "hf_subset": "default", + "languages": [ + "eng-Latn" + ] + } + ] + }, + "evaluation_time": 353.27235865592957, + "kg_co2_emissions": null +} \ No newline at end of file diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json new file mode 100644 index 0000000000..906c9033e7 --- /dev/null +++ b/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json @@ -0,0 +1 @@ +{"name": "GeoGPT-Research-Project/GeoEmbedding", "revision": "29803c28ea7ef6871194a8ebc85ad7bfe174928e", "release_date": "2025-04-22", "languages": ["eng-Latn"], "n_parameters": 7241732096, "memory_usage_mb": 27625.0, "max_tokens": 32768.0, "embed_dim": 4096, "license": "apache-2.0", "open_weights": true, "public_training_code": "", "public_training_data": "", "framework": ["Sentence Transformers", "PyTorch"], "reference": "https://huggingface.co/GeoGPT-Research-Project/GeoEmbedding", "similarity_fn_name": "cosine", "use_instructions": true, "training_datasets": {"ArguAna": ["test"], "FEVER": ["train"], "MSMARCO": ["train"], "FiQA2018": ["train"], "HotpotQA": ["train"], "NFCorpus": ["train"], "SciFact": ["train"], "AmazonCounterfactualClassification": ["train"], "AmazonPolarityClassification": ["train"], "AmazonReviewsClassification": ["train"], "Banking77Classification": ["train"], "EmotionClassification": ["train"], "MassiveIntentClassification": ["train"], "MTOPDomainClassification": ["train"], "MTOPIntentClassification": ["train"], "ToxicConversationsClassification": ["train"], "TweetSentimentExtractionClassification": ["train"], "ArxivClusteringS2S": ["test"], "ArxivClusteringP2P": ["test"], "MedrixvClusteringS2S": ["test"], "MedrixvClusteringP2P": ["test"], "BiorxivClusteringS2S": ["test"], "BiorxivClusteringP2P": ["test"], "STS12": ["train"], "STS22": ["train"], "STSBenchmark": ["train"], "StackOverflowDupQuestions": ["train"]}, "adapted_from": null, "superseded_by": null, "is_cross_encoder": null, "modalities": ["text"], "loader": "sentence_transformers_loader"} \ No newline at end of file From 0c7869db13eaa96e66d0b24a0dd4f9f9988e670d Mon Sep 17 00:00:00 2001 From: zhangzeqing Date: Fri, 6 Jun 2025 14:37:05 +0800 Subject: [PATCH 2/3] rename geoembedding result file --- .../AmazonCounterfactualClassification.json | 0 .../ArXivHierarchicalClusteringP2P.json | 0 .../ArXivHierarchicalClusteringS2S.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json | 0 .../AskUbuntuDupQuestions.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json | 0 .../Banking77Classification.json | 0 .../BiorxivClusteringP2P.v2.json | 0 .../CQADupstackGamingRetrieval.json | 0 .../CQADupstackUnixRetrieval.json | 0 .../ClimateFEVERHardNegatives.json | 0 .../FEVERHardNegatives.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json | 0 .../HotpotQAHardNegatives.json | 0 .../ImdbClassification.json | 0 .../MTOPDomainClassification.json | 0 .../MassiveIntentClassification.json | 0 .../MassiveScenarioClassification.json | 0 .../MedrxivClusteringP2P.v2.json | 0 .../MedrxivClusteringS2S.v2.json | 0 .../MindSmallReranking.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json | 0 .../SprintDuplicateQuestions.json | 0 .../StackExchangeClustering.v2.json | 0 .../StackExchangeClusteringP2P.v2.json | 0 .../SummEvalSummarization.v2.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json | 0 .../Touche2020Retrieval.v3.json | 0 .../ToxicConversationsClassification.json | 0 .../TweetSentimentExtractionClassification.json | 0 .../TwentyNewsgroupsClustering.v2.json | 0 .../TwitterSemEval2015.json | 0 .../TwitterURLCorpus.json | 0 .../29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json | 0 42 files changed, 0 insertions(+), 0 deletions(-) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json (100%) rename results/{geogpt-research-project__geoembedding => GeoGPT-Research-Project__GeoEmbedding}/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json (100%) diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AmazonCounterfactualClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringP2P.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArXivHierarchicalClusteringS2S.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ArguAna.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/AskUbuntuDupQuestions.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BIOSSES.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Banking77Classification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/BiorxivClusteringP2P.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackGamingRetrieval.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/CQADupstackUnixRetrieval.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ClimateFEVERHardNegatives.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FEVERHardNegatives.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/FiQA2018.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/HotpotQAHardNegatives.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ImdbClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MTOPDomainClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveIntentClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MassiveScenarioClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringP2P.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MedrxivClusteringS2S.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/MindSmallReranking.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SCIDOCS.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SICK-R.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS12.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS13.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS14.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS15.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS17.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STS22.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/STSBenchmark.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SprintDuplicateQuestions.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClustering.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/StackExchangeClusteringP2P.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/SummEvalSummarization.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TRECCOVID.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/Touche2020Retrieval.v3.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/ToxicConversationsClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TweetSentimentExtractionClassification.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwentyNewsgroupsClustering.v2.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterSemEval2015.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/TwitterURLCorpus.json diff --git a/results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json b/results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json similarity index 100% rename from results/geogpt-research-project__geoembedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json rename to results/GeoGPT-Research-Project__GeoEmbedding/29803c28ea7ef6871194a8ebc85ad7bfe174928e/model_meta.json From 5c55f9ae9c4cdb4e90de9c4e92c46e4743fc25d7 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Mon, 9 Jun 2025 12:15:45 +0200 Subject: [PATCH 3/3] minor fix --- scripts/create_pr_results_comment.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/create_pr_results_comment.py b/scripts/create_pr_results_comment.py index 3a838b557b..9bd2d2de01 100644 --- a/scripts/create_pr_results_comment.py +++ b/scripts/create_pr_results_comment.py @@ -3,7 +3,7 @@ Usage: gh pr checkout {pr-number} - python scripts/create_results_pr_comment.py [--models MODEL1 MODEL2 ...] + scripts/create_pr_results_comment.py [--models MODEL1 MODEL2 ...] Description: - Compares new model results (added in the current PR) against reference models. @@ -15,7 +15,7 @@ --models: List of reference models to compare against (default: intfloat/multilingual-e5-large google/gemini-embedding-001) Example: - python scripts/create_results_pr_comment.py --models intfloat/multilingual-e5-large myorg/my-new-model + scripts/create_pr_results_comment.py --models intfloat/multilingual-e5-large myorg/my-new-model """ from __future__ import annotations @@ -54,9 +54,9 @@ def get_diff_from_main() -> list[str]: text=True, ).stdout.splitlines() - if current_rev == origin_rev: + if current_rev != origin_rev: raise ValueError( - "Your main branch is not up-to-date, please run `git fetch origin main`" + f"Your main branch is not up-to-date ({current_rev} != {origin_rev}), please run `git fetch origin main`" ) differences = subprocess.run(