diff --git a/scripts/mmteb_create_author_list.ipynb b/scripts/mmteb_create_author_list.ipynb deleted file mode 100644 index b2acd782e6..0000000000 --- a/scripts/mmteb_create_author_list.ipynb +++ /dev/null @@ -1,1463 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create points table and author list" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "import os\n", - "import sys\n", - "from pathlib import Path\n", - "\n", - "project_path = Path(os.getcwd()) / \"..\"\n", - "\n", - "sys.path.append(str(project_path / \"docs\" / \"mmteb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from create_points_table import load_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Point table" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "df = load_data()\n", - "df = df.groupby(\"GitHub\").sum().astype(int)\n", - "# create a new column with the sum of the points\n", - "df[\"Total\"] = df.sum(axis=1)\n", - "df = df.sort_values(\"Total\", ascending=False)\n", - "# total as first column\n", - "df = df[[\"Total\"] + [col for col in df.columns if col != \"Total\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | Total | \n", - "Bug fixes | \n", - "Review PR | \n", - "New dataset | \n", - "Dataset annotations | \n", - "Paper writing | \n", - "Coordination | \n", - "New task | \n", - "Running Models | \n", - "
|---|---|---|---|---|---|---|---|---|---|
| GitHub | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| KennethEnevoldsen | \n", - "597 | \n", - "87 | \n", - "326 | \n", - "68 | \n", - "35 | \n", - "0 | \n", - "81 | \n", - "0 | \n", - "0 | \n", - "
| isaac-chung | \n", - "433 | \n", - "50 | \n", - "194 | \n", - "120 | \n", - "1 | \n", - "12 | \n", - "54 | \n", - "2 | \n", - "0 | \n", - "
| imenelydiaker | \n", - "358 | \n", - "24 | \n", - "144 | \n", - "120 | \n", - "0 | \n", - "0 | \n", - "70 | \n", - "0 | \n", - "0 | \n", - "
| awinml | \n", - "302 | \n", - "0 | \n", - "2 | \n", - "300 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "
| x-tabdeveloping | \n", - "239 | \n", - "10 | \n", - "32 | \n", - "144 | \n", - "0 | \n", - "0 | \n", - "41 | \n", - "12 | \n", - "0 | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
| PhilipMay | \n", - "2 | \n", - "0 | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "
| achibb | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "
| antoniolanza1996 | \n", - "2 | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "
| cslizc | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "
| hanhainebula | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "
98 rows \u00d7 9 columns
\n", - "| \n", - " | GitHub | \n", - "First name | \n", - "Last name | \n", - "User on openreview | \n", - "Affiliations | \n", - "|
|---|---|---|---|---|---|---|
| 0 | \n", - "KennethEnevoldsen | \n", - "Kenneth | \n", - "Enevoldsen | \n", - "kennethcenevoldsen@gmail.com | \n", - "~Kenneth_Enevoldsen1 | \n", - "Aarhus University | \n", - "
| 1 | \n", - "x-tabdeveloping | \n", - "M\u00e1rton | \n", - "Kardos | \n", - "martonkardos@cas.au.dk | \n", - "~M\u00e1rton_Kardos1 | \n", - "Aarhus University | \n", - "
| 2 | \n", - "imenelydiaker | \n", - "Imene | \n", - "Kerboua | \n", - "\n", - " | ~Imene_Kerboua1 | \n", - "INSA Lyon, LIRIS | \n", - "
| 3 | \n", - "wissam-sib | \n", - "Wissam | \n", - "Siblini | \n", - "wissam.siblini92@gmail.com | \n", - "~Wissam_Siblini1 | \n", - "Individual Contributor | \n", - "
| 4 | \n", - "GabrielSequeira | \n", - "Gabriel | \n", - "Sequeira | \n", - "\n", - " | \n", - " | Individual Contributor | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
| 83 | \n", - "sarahooker | \n", - "Sara | \n", - "Hooker | \n", - "\n", - " | ~Sara_Hooker2 | \n", - "Cohere For AI | \n", - "
| 84 | \n", - "kwojtasi | \n", - "Konrad | \n", - "Wojtasik | \n", - "\n", - " | ~Konrad_Wojtasik1 | \n", - "Wroc\u0142aw University of Science and Technology | \n", - "
| 85 | \n", - "tmp_handle | \n", - "Jimmy | \n", - "Lin | \n", - "\n", - " | ~Jimmy_Lin2 | \n", - "University of Waterloo | \n", - "
| 86 | \n", - "hongjin-su | \n", - "Hongjin | \n", - "Su | \n", - "\n", - " | ~Hongjin_SU1 | \n", - "University of Hong Kong | \n", - "
| 87 | \n", - "howard-yen | \n", - "Howard | \n", - "Yen | \n", - "\n", - " | ~Howard_Yen1 | \n", - "Princeton University | \n", - "
88 rows \u00d7 6 columns
\n", - "| \n", - " | model | \n", - "revision | \n", - "BelebeleRetrieval | \n", - "BengaliSentimentAnalysis | \n", - "GujaratiNewsClassification | \n", - "HindiDiscourseClassification | \n", - "IN22ConvBitextMining | \n", - "IN22GenBitextMining | \n", - "IndicCrosslingualSTS | \n", - "MTOPIntentClassification | \n", - "... | \n", - "Mean | \n", - "Mean BitextMining | \n", - "Mean PairClassification | \n", - "Mean Classification | \n", - "Mean STS | \n", - "Mean Retrieval | \n", - "Mean MultilabelClassification | \n", - "Mean Clustering | \n", - "Mean Reranking | \n", - "mean pr. task type | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "73.754577 | \n", - "83.984927 | \n", - "87.518968 | \n", - "35.234375 | \n", - "71.873549 | \n", - "88.875417 | \n", - "53.688533 | \n", - "62.952996 | \n", - "... | \n", - "70.186085 | \n", - "80.374483 | \n", - "76.314311 | \n", - "67.009249 | \n", - "53.688533 | \n", - "84.862788 | \n", - "NaN | \n", - "51.671254 | \n", - "87.462063 | \n", - "71.626097 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "68.199269 | \n", - "83.070706 | \n", - "76.737481 | \n", - "38.740234 | \n", - "67.784590 | \n", - "87.696664 | \n", - "43.866162 | \n", - "59.198891 | \n", - "... | \n", - "66.354869 | \n", - "77.740627 | \n", - "75.057575 | \n", - "64.659155 | \n", - "43.866162 | \n", - "82.604635 | \n", - "NaN | \n", - "25.602675 | \n", - "85.970595 | \n", - "65.071632 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "60.371654 | \n", - "79.643111 | \n", - "74.908953 | \n", - "39.038086 | \n", - "63.130747 | \n", - "85.290012 | \n", - "41.113890 | \n", - "54.049160 | \n", - "... | \n", - "64.571532 | \n", - "74.210379 | \n", - "72.795771 | \n", - "63.753851 | \n", - "41.113890 | \n", - "77.842327 | \n", - "NaN | \n", - "24.607904 | \n", - "83.761455 | \n", - "62.583654 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "58.193538 | \n", - "83.430444 | \n", - "74.393020 | \n", - "39.335938 | \n", - "62.739174 | \n", - "84.663212 | \n", - "40.761080 | \n", - "52.118503 | \n", - "... | \n", - "64.720156 | \n", - "73.701193 | \n", - "73.795104 | \n", - "63.782196 | \n", - "40.761080 | \n", - "76.817269 | \n", - "NaN | \n", - "29.054088 | \n", - "84.369577 | \n", - "63.182930 | \n", - "
| 0 | \n", - "GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "70.063654 | \n", - "72.100459 | \n", - "69.855842 | \n", - "37.089844 | \n", - "42.137574 | \n", - "74.667530 | \n", - "27.245620 | \n", - "63.660123 | \n", - "... | \n", - "60.204173 | \n", - "58.402552 | \n", - "67.838170 | \n", - "60.043953 | \n", - "27.245620 | \n", - "79.496827 | \n", - "NaN | \n", - "27.978331 | \n", - "84.695146 | \n", - "57.957228 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "66.289423 | \n", - "72.074816 | \n", - "73.088012 | \n", - "32.006836 | \n", - "44.302781 | \n", - "73.799887 | \n", - "22.981297 | \n", - "59.232093 | \n", - "... | \n", - "60.022827 | \n", - "59.051334 | \n", - "72.951099 | \n", - "59.563869 | \n", - "22.981297 | \n", - "77.266212 | \n", - "NaN | \n", - "32.702546 | \n", - "84.420079 | \n", - "58.419491 | \n", - "
| 6 | \n", - "sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "47.515538 | \n", - "80.415418 | \n", - "76.358118 | \n", - "38.398438 | \n", - "63.459252 | \n", - "84.668840 | \n", - "52.758018 | \n", - "62.863971 | \n", - "... | \n", - "61.855117 | \n", - "74.064046 | \n", - "64.583366 | \n", - "61.906474 | \n", - "52.758018 | \n", - "64.334769 | \n", - "NaN | \n", - "21.105169 | \n", - "78.980476 | \n", - "59.676046 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "36.101692 | \n", - "74.882594 | \n", - "81.934750 | \n", - "38.691406 | \n", - "33.588414 | \n", - "54.802443 | \n", - "34.096874 | \n", - "61.699834 | \n", - "... | \n", - "58.502887 | \n", - "44.195428 | \n", - "82.036142 | \n", - "61.943266 | \n", - "34.096874 | \n", - "57.910346 | \n", - "NaN | \n", - "32.061697 | \n", - "74.332275 | \n", - "55.225147 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "19.395077 | \n", - "60.613522 | \n", - "76.874052 | \n", - "37.431641 | \n", - "11.867235 | \n", - "18.709649 | \n", - "19.788971 | \n", - "59.196945 | \n", - "... | \n", - "49.672632 | \n", - "15.288442 | \n", - "77.849520 | \n", - "57.645424 | \n", - "19.788971 | \n", - "48.779038 | \n", - "NaN | \n", - "16.675403 | \n", - "59.258690 | \n", - "42.183641 | \n", - "
| 9 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "9.627615 | \n", - "54.188273 | \n", - "44.104704 | \n", - "34.316406 | \n", - "2.110360 | \n", - "5.353099 | \n", - "-2.509332 | \n", - "18.150352 | \n", - "... | \n", - "33.630293 | \n", - "3.731730 | \n", - "52.634533 | \n", - "45.224600 | \n", - "-2.509332 | \n", - "12.853808 | \n", - "NaN | \n", - "4.012625 | \n", - "42.601772 | \n", - "22.649962 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "9.927654 | \n", - "58.975982 | \n", - "41.638847 | \n", - "28.911133 | \n", - "1.922509 | \n", - "4.993733 | \n", - "-5.337116 | \n", - "17.615309 | \n", - "... | \n", - "33.121629 | \n", - "3.458121 | \n", - "55.034299 | \n", - "43.891361 | \n", - "-5.337116 | \n", - "13.923827 | \n", - "NaN | \n", - "3.687981 | \n", - "47.587196 | \n", - "23.177952 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "7.661654 | \n", - "58.541631 | \n", - "42.579666 | \n", - "32.036133 | \n", - "1.415965 | \n", - "3.573556 | \n", - "-6.275768 | \n", - "18.495051 | \n", - "... | \n", - "31.842342 | \n", - "2.494760 | \n", - "53.667487 | \n", - "44.145491 | \n", - "-6.275768 | \n", - "6.217327 | \n", - "NaN | \n", - "3.103278 | \n", - "39.181786 | \n", - "20.362052 | \n", - "
12 rows \u00d7 34 columns
\n", - "| \n", - " | model | \n", - "Borda str | \n", - "Mean | \n", - "mean pr. task type | \n", - "Mean BitextMining | \n", - "Mean PairClassification | \n", - "Mean Classification | \n", - "Mean STS | \n", - "Mean Retrieval | \n", - "Mean MultilabelClassification | \n", - "Mean Clustering | \n", - "Mean Reranking | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "1 (209) | \n", - "70.186085 | \n", - "71.626097 | \n", - "80.374483 | \n", - "76.314311 | \n", - "67.009249 | \n", - "53.688533 | \n", - "84.862788 | \n", - "NaN | \n", - "51.671254 | \n", - "87.462063 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "2 (188) | \n", - "66.354869 | \n", - "65.071632 | \n", - "77.740627 | \n", - "75.057575 | \n", - "64.659155 | \n", - "43.866162 | \n", - "82.604635 | \n", - "NaN | \n", - "25.602675 | \n", - "85.970595 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-base | \n", - "3 (173) | \n", - "64.571532 | \n", - "62.583654 | \n", - "74.210379 | \n", - "72.795771 | \n", - "63.753851 | \n", - "41.113890 | \n", - "77.842327 | \n", - "NaN | \n", - "24.607904 | \n", - "83.761455 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "4 (164) | \n", - "64.720156 | \n", - "63.182930 | \n", - "73.701193 | \n", - "73.795104 | \n", - "63.782196 | \n", - "40.761080 | \n", - "76.817269 | \n", - "NaN | \n", - "29.054088 | \n", - "84.369577 | \n", - "
| 0 | \n", - "GritLM/GritLM-7B | \n", - "5 (151) | \n", - "60.204173 | \n", - "57.957228 | \n", - "58.402552 | \n", - "67.838170 | \n", - "60.043953 | \n", - "27.245620 | \n", - "79.496827 | \n", - "NaN | \n", - "27.978331 | \n", - "84.695146 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "6 (144) | \n", - "60.022827 | \n", - "58.419491 | \n", - "59.051334 | \n", - "72.951099 | \n", - "59.563869 | \n", - "22.981297 | \n", - "77.266212 | \n", - "NaN | \n", - "32.702546 | \n", - "84.420079 | \n", - "
| 6 | \n", - "sentence-transformers/LaBSE | \n", - "7 (139) | \n", - "61.855117 | \n", - "59.676046 | \n", - "74.064046 | \n", - "64.583366 | \n", - "61.906474 | \n", - "52.758018 | \n", - "64.334769 | \n", - "NaN | \n", - "21.105169 | \n", - "78.980476 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "8 (137) | \n", - "58.502887 | \n", - "55.225147 | \n", - "44.195428 | \n", - "82.036142 | \n", - "61.943266 | \n", - "34.096874 | \n", - "57.910346 | \n", - "NaN | \n", - "32.061697 | \n", - "74.332275 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "9 (98) | \n", - "49.672632 | \n", - "42.183641 | \n", - "15.288442 | \n", - "77.849520 | \n", - "57.645424 | \n", - "19.788971 | \n", - "48.779038 | \n", - "NaN | \n", - "16.675403 | \n", - "59.258690 | \n", - "
| 9 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "10 (68) | \n", - "33.630293 | \n", - "22.649962 | \n", - "3.731730 | \n", - "52.634533 | \n", - "45.224600 | \n", - "-2.509332 | \n", - "12.853808 | \n", - "NaN | \n", - "4.012625 | \n", - "42.601772 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "11 (49) | \n", - "33.121629 | \n", - "23.177952 | \n", - "3.458121 | \n", - "55.034299 | \n", - "43.891361 | \n", - "-5.337116 | \n", - "13.923827 | \n", - "NaN | \n", - "3.687981 | \n", - "47.587196 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "12 (40) | \n", - "31.842342 | \n", - "20.362052 | \n", - "2.494760 | \n", - "53.667487 | \n", - "44.145491 | \n", - "-6.275768 | \n", - "6.217327 | \n", - "NaN | \n", - "3.103278 | \n", - "39.181786 | \n", - "
| \n", - " | model | \n", - "revision | \n", - "AlloProfClusteringS2S.v2 | \n", - "AlloprofReranking | \n", - "AlloprofRetrieval | \n", - "AmazonCounterfactualClassification | \n", - "ArguAna | \n", - "BUCC.v2 | \n", - "BelebeleRetrieval | \n", - "BibleNLPBitextMining | \n", - "... | \n", - "Mean | \n", - "Mean BitextMining | \n", - "Mean PairClassification | \n", - "Mean Classification | \n", - "Mean STS | \n", - "Mean Retrieval | \n", - "Mean MultilabelClassification | \n", - "Mean Clustering | \n", - "Mean Reranking | \n", - "mean pr. task type | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "56.411821 | \n", - "77.926160 | \n", - "55.422 | \n", - "78.452787 | \n", - "63.171 | \n", - "99.502429 | \n", - "91.393110 | \n", - "97.337705 | \n", - "... | \n", - "62.970819 | \n", - "90.420705 | \n", - "89.939440 | \n", - "64.736896 | \n", - "76.050247 | \n", - "57.105263 | \n", - "17.551200 | \n", - "45.281105 | \n", - "60.269644 | \n", - "62.669312 | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "56.465655 | \n", - "74.677730 | \n", - "52.118 | \n", - "67.619802 | \n", - "58.476 | \n", - "99.473836 | \n", - "92.240123 | \n", - "97.916667 | \n", - "... | \n", - "62.192929 | \n", - "90.383846 | \n", - "89.985879 | \n", - "63.241319 | \n", - "77.428657 | \n", - "54.802566 | \n", - "17.265889 | \n", - "46.895583 | \n", - "58.420108 | \n", - "62.302981 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "57.112000 | \n", - "78.317664 | \n", - "54.619 | \n", - "73.901466 | \n", - "61.653 | \n", - "99.384579 | \n", - "88.393877 | \n", - "96.731306 | \n", - "... | \n", - "61.729119 | \n", - "89.580187 | \n", - "91.154182 | \n", - "62.946715 | \n", - "76.481287 | \n", - "53.644095 | \n", - "15.464545 | \n", - "46.473328 | \n", - "59.815300 | \n", - "61.944955 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "35.150764 | \n", - "69.442880 | \n", - "39.341 | \n", - "75.116732 | \n", - "54.357 | \n", - "99.022547 | \n", - "92.728438 | \n", - "94.571508 | \n", - "... | \n", - "58.491543 | \n", - "84.456404 | \n", - "88.753477 | \n", - "60.385125 | \n", - "75.759056 | \n", - "50.814358 | \n", - "14.980040 | \n", - "38.235917 | \n", - "55.910580 | \n", - "58.661870 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "34.113190 | \n", - "65.897152 | \n", - "34.447 | \n", - "75.098079 | \n", - "44.206 | \n", - "98.699878 | \n", - "87.649863 | \n", - "94.283655 | \n", - "... | \n", - "57.186738 | \n", - "84.110742 | \n", - "87.352572 | \n", - "57.854090 | \n", - "73.669498 | \n", - "50.201567 | \n", - "14.862272 | \n", - "38.160560 | \n", - "53.854468 | \n", - "57.508221 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "41.806274 | \n", - "67.204322 | \n", - "30.799 | \n", - "73.983687 | \n", - "48.908 | \n", - "98.330171 | \n", - "79.738726 | \n", - "95.205078 | \n", - "... | \n", - "54.410453 | \n", - "79.468659 | \n", - "90.725512 | \n", - "56.599349 | \n", - "74.253507 | \n", - "41.160312 | \n", - "6.897841 | \n", - "35.783209 | \n", - "52.336680 | \n", - "54.653134 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "35.393261 | \n", - "64.410023 | \n", - "27.380 | \n", - "71.746625 | \n", - "39.088 | \n", - "96.352984 | \n", - "82.592438 | \n", - "85.072037 | \n", - "... | \n", - "55.038105 | \n", - "80.948799 | \n", - "86.374052 | \n", - "56.108885 | \n", - "71.636076 | \n", - "46.072279 | \n", - "13.967448 | \n", - "36.498080 | \n", - "54.108918 | \n", - "55.714317 | \n", - "
| 6 | \n", - "sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "30.208930 | \n", - "55.374766 | \n", - "19.775 | \n", - "74.485455 | \n", - "34.178 | \n", - "99.189150 | \n", - "72.629699 | \n", - "97.470989 | \n", - "... | \n", - "51.843718 | \n", - "88.779284 | \n", - "85.180182 | \n", - "55.100932 | \n", - "65.683818 | \n", - "34.351709 | \n", - "16.298114 | \n", - "34.253079 | \n", - "48.660051 | \n", - "53.538396 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "40.451213 | \n", - "62.424382 | \n", - "26.634 | \n", - "69.772668 | \n", - "44.878 | \n", - "97.167386 | \n", - "74.561219 | \n", - "93.669550 | \n", - "... | \n", - "51.731912 | \n", - "76.988968 | \n", - "88.925092 | \n", - "52.678430 | \n", - "72.536393 | \n", - "37.598783 | \n", - "5.687953 | \n", - "34.443163 | \n", - "50.198119 | \n", - "52.382113 | \n", - "
| 9 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "35.215154 | \n", - "69.630056 | \n", - "34.270 | \n", - "62.193713 | \n", - "46.521 | \n", - "26.357644 | \n", - "39.288041 | \n", - "6.588554 | \n", - "... | \n", - "44.687410 | \n", - "29.807469 | \n", - "80.519579 | \n", - "49.249220 | \n", - "63.883592 | \n", - "37.307847 | \n", - "10.872124 | \n", - "36.190054 | \n", - "49.608487 | \n", - "44.679796 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "31.976537 | \n", - "67.013696 | \n", - "33.196 | \n", - "63.058263 | \n", - "47.128 | \n", - "28.503819 | \n", - "38.265164 | \n", - "6.486622 | \n", - "... | \n", - "44.389138 | \n", - "32.061171 | \n", - "81.520946 | \n", - "49.244106 | \n", - "64.192402 | \n", - "36.243232 | \n", - "7.574194 | \n", - "32.513396 | \n", - "49.196304 | \n", - "44.068219 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "31.106654 | \n", - "62.621726 | \n", - "28.413 | \n", - "61.660638 | \n", - "50.167 | \n", - "20.293395 | \n", - "34.484863 | \n", - "4.975517 | \n", - "... | \n", - "43.447989 | \n", - "27.244291 | \n", - "80.187412 | \n", - "47.757474 | \n", - "62.650859 | \n", - "37.346527 | \n", - "8.775719 | \n", - "33.555435 | \n", - "47.729140 | \n", - "43.155857 | \n", - "
12 rows \u00d7 87 columns
\n", - "| \n", - " | model | \n", - "Borda str | \n", - "Mean | \n", - "mean pr. task type | \n", - "Mean BitextMining | \n", - "Mean PairClassification | \n", - "Mean Classification | \n", - "Mean STS | \n", - "Mean Retrieval | \n", - "Mean MultilabelClassification | \n", - "Mean Clustering | \n", - "Mean Reranking | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "GritLM/GritLM-7B | \n", - "1 (757) | \n", - "62.970819 | \n", - "62.669312 | \n", - "90.420705 | \n", - "89.939440 | \n", - "64.736896 | \n", - "76.050247 | \n", - "57.105263 | \n", - "17.551200 | \n", - "45.281105 | \n", - "60.269644 | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "2 (732) | \n", - "62.192929 | \n", - "62.302981 | \n", - "90.383846 | \n", - "89.985879 | \n", - "63.241319 | \n", - "77.428657 | \n", - "54.802566 | \n", - "17.265889 | \n", - "46.895583 | \n", - "58.420108 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "3 (725) | \n", - "61.729119 | \n", - "61.944955 | \n", - "89.580187 | \n", - "91.154182 | \n", - "62.946715 | \n", - "76.481287 | \n", - "53.644095 | \n", - "15.464545 | \n", - "46.473328 | \n", - "59.815300 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4 (586) | \n", - "58.491543 | \n", - "58.661870 | \n", - "84.456404 | \n", - "88.753477 | \n", - "60.385125 | \n", - "75.759056 | \n", - "50.814358 | \n", - "14.980040 | \n", - "38.235917 | \n", - "55.910580 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-base | \n", - "5 (499) | \n", - "57.186738 | \n", - "57.508221 | \n", - "84.110742 | \n", - "87.352572 | \n", - "57.854090 | \n", - "73.669498 | \n", - "50.201567 | \n", - "14.862272 | \n", - "38.160560 | \n", - "53.854468 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "6 (463) | \n", - "54.410453 | \n", - "54.653134 | \n", - "79.468659 | \n", - "90.725512 | \n", - "56.599349 | \n", - "74.253507 | \n", - "41.160312 | \n", - "6.897841 | \n", - "35.783209 | \n", - "52.336680 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "7 (399) | \n", - "55.038105 | \n", - "55.714317 | \n", - "80.948799 | \n", - "86.374052 | \n", - "56.108885 | \n", - "71.636076 | \n", - "46.072279 | \n", - "13.967448 | \n", - "36.498080 | \n", - "54.108918 | \n", - "
| 6 | \n", - "sentence-transformers/LaBSE | \n", - "8 (358) | \n", - "51.843718 | \n", - "53.538396 | \n", - "88.779284 | \n", - "85.180182 | \n", - "55.100932 | \n", - "65.683818 | \n", - "34.351709 | \n", - "16.298114 | \n", - "34.253079 | \n", - "48.660051 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "9 (328) | \n", - "51.731912 | \n", - "52.382113 | \n", - "76.988968 | \n", - "88.925092 | \n", - "52.678430 | \n", - "72.536393 | \n", - "37.598783 | \n", - "5.687953 | \n", - "34.443163 | \n", - "50.198119 | \n", - "
| 9 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "10 (310) | \n", - "44.687410 | \n", - "44.679796 | \n", - "29.807469 | \n", - "80.519579 | \n", - "49.249220 | \n", - "63.883592 | \n", - "37.307847 | \n", - "10.872124 | \n", - "36.190054 | \n", - "49.608487 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "11 (292) | \n", - "44.389138 | \n", - "44.068219 | \n", - "32.061171 | \n", - "81.520946 | \n", - "49.244106 | \n", - "64.192402 | \n", - "36.243232 | \n", - "7.574194 | \n", - "32.513396 | \n", - "49.196304 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "12 (237) | \n", - "43.447989 | \n", - "43.155857 | \n", - "27.244291 | \n", - "80.187412 | \n", - "47.757474 | \n", - "62.650859 | \n", - "37.346527 | \n", - "8.775719 | \n", - "33.555435 | \n", - "47.729140 | \n", - "
| \n", - " | model | \n", - "revision | \n", - "AILAStatutes | \n", - "AfriSentiClassification | \n", - "AlloProfClusteringS2S.v2 | \n", - "AlloprofReranking | \n", - "AmazonCounterfactualClassification | \n", - "ArXivHierarchicalClusteringP2P | \n", - "ArXivHierarchicalClusteringS2S | \n", - "ArguAna | \n", - "... | \n", - "Mean | \n", - "Mean BitextMining | \n", - "Mean PairClassification | \n", - "Mean Classification | \n", - "Mean STS | \n", - "Mean Retrieval | \n", - "Mean MultilabelClassification | \n", - "Mean Clustering | \n", - "Mean Reranking | \n", - "mean pr. task type | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "29.659 | \n", - "45.387432 | \n", - "56.465655 | \n", - "74.677730 | \n", - "68.606356 | \n", - "62.534994 | \n", - "61.284058 | \n", - "58.476 | \n", - "... | \n", - "63.227169 | \n", - "80.126470 | \n", - "80.863584 | \n", - "64.942144 | \n", - "76.814671 | \n", - "57.116686 | \n", - "22.913504 | \n", - "51.538016 | \n", - "62.613273 | \n", - "62.116044 | \n", - "
| 0 | \n", - "GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "41.800 | \n", - "45.078589 | \n", - "56.411821 | \n", - "77.926160 | \n", - "79.296512 | \n", - "59.760046 | \n", - "62.283243 | \n", - "63.171 | \n", - "... | \n", - "60.930855 | \n", - "70.531726 | \n", - "79.944411 | \n", - "61.830247 | \n", - "73.327960 | \n", - "58.306711 | \n", - "22.773771 | \n", - "50.482520 | \n", - "63.778754 | \n", - "60.122013 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "34.535 | \n", - "44.476335 | \n", - "57.112000 | \n", - "78.317664 | \n", - "73.555839 | \n", - "65.283735 | \n", - "61.278123 | \n", - "61.653 | \n", - "... | \n", - "60.279746 | \n", - "70.579905 | \n", - "81.122211 | \n", - "60.314295 | \n", - "74.021600 | \n", - "55.750101 | \n", - "22.196821 | \n", - "51.390095 | \n", - "63.819183 | \n", - "59.899276 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "20.842 | \n", - "45.500507 | \n", - "35.150764 | \n", - "69.442880 | \n", - "76.163514 | \n", - "55.572114 | \n", - "56.212217 | \n", - "54.357 | \n", - "... | \n", - "58.570572 | \n", - "71.666250 | \n", - "79.028390 | \n", - "59.916975 | \n", - "73.488372 | \n", - "54.111178 | \n", - "21.302373 | \n", - "42.923755 | \n", - "62.840466 | \n", - "58.159720 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "20.371 | \n", - "43.802315 | \n", - "34.113190 | \n", - "65.897152 | \n", - "74.334014 | \n", - "56.683137 | \n", - "56.115056 | \n", - "44.206 | \n", - "... | \n", - "57.012959 | \n", - "69.437945 | \n", - "77.154385 | \n", - "58.205718 | \n", - "71.444249 | \n", - "52.721823 | \n", - "20.162060 | \n", - "42.674461 | \n", - "60.176360 | \n", - "56.497125 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "22.236 | \n", - "42.445471 | \n", - "41.806274 | \n", - "67.204322 | \n", - "72.766528 | \n", - "55.342767 | \n", - "55.160328 | \n", - "48.908 | \n", - "... | \n", - "52.005259 | \n", - "52.062937 | \n", - "81.154392 | \n", - "55.064354 | \n", - "69.661040 | \n", - "39.757752 | \n", - "16.398034 | \n", - "41.080665 | \n", - "53.374677 | \n", - "51.069231 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "19.011 | \n", - "42.358118 | \n", - "35.393261 | \n", - "64.410023 | \n", - "69.163655 | \n", - "54.276235 | \n", - "54.198749 | \n", - "39.088 | \n", - "... | \n", - "55.456701 | \n", - "67.472922 | \n", - "76.329053 | \n", - "56.500928 | \n", - "70.360817 | \n", - "49.345019 | \n", - "19.096430 | \n", - "41.735446 | \n", - "60.391010 | \n", - "55.153953 | \n", - "
| 6 | \n", - "sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "16.717 | \n", - "43.170657 | \n", - "30.208930 | \n", - "55.374766 | \n", - "74.987910 | \n", - "53.442702 | \n", - "49.986064 | \n", - "34.178 | \n", - "... | \n", - "52.100056 | \n", - "76.351008 | \n", - "75.969075 | \n", - "54.600844 | \n", - "65.349763 | \n", - "33.169113 | \n", - "20.122117 | \n", - "39.159195 | \n", - "50.197562 | \n", - "51.864835 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "20.525 | \n", - "37.672740 | \n", - "40.451213 | \n", - "62.424382 | \n", - "68.075646 | \n", - "53.617944 | \n", - "52.245736 | \n", - "44.878 | \n", - "... | \n", - "48.781520 | \n", - "44.563390 | \n", - "78.993229 | \n", - "51.656889 | \n", - "66.581953 | \n", - "36.614971 | \n", - "14.930329 | \n", - "39.337371 | \n", - "50.972387 | \n", - "47.956315 | \n", - "
| 9 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "21.275 | \n", - "37.267741 | \n", - "35.215154 | \n", - "69.630056 | \n", - "61.846281 | \n", - "61.473392 | \n", - "56.459312 | \n", - "46.521 | \n", - "... | \n", - "42.470049 | \n", - "21.161317 | \n", - "70.893513 | \n", - "46.985488 | \n", - "57.599660 | \n", - "32.808557 | \n", - "16.280508 | \n", - "40.765913 | \n", - "42.234410 | \n", - "41.091171 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "20.714 | \n", - "37.288350 | \n", - "31.976537 | \n", - "67.013696 | \n", - "62.113993 | \n", - "57.444530 | \n", - "55.061728 | \n", - "47.128 | \n", - "... | \n", - "42.151564 | \n", - "22.908162 | \n", - "71.679313 | \n", - "46.848558 | \n", - "57.202961 | \n", - "32.504190 | \n", - "14.586407 | \n", - "36.839927 | \n", - "44.326628 | \n", - "40.862018 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "20.516 | \n", - "39.807785 | \n", - "31.106654 | \n", - "62.621726 | \n", - "61.280109 | \n", - "59.106889 | \n", - "54.542340 | \n", - "50.167 | \n", - "... | \n", - "41.432105 | \n", - "20.093673 | \n", - "71.234656 | \n", - "46.198911 | \n", - "56.084065 | \n", - "32.513445 | \n", - "15.054331 | \n", - "38.037554 | \n", - "40.284573 | \n", - "39.937651 | \n", - "
12 rows \u00d7 146 columns
\n", - "| \n", - " | model | \n", - "Borda str | \n", - "Mean | \n", - "mean pr. task type | \n", - "Mean BitextMining | \n", - "Mean PairClassification | \n", - "Mean Classification | \n", - "Mean STS | \n", - "Mean Retrieval | \n", - "Mean MultilabelClassification | \n", - "Mean Clustering | \n", - "Mean Reranking | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "1 (1375) | \n", - "63.227169 | \n", - "62.116044 | \n", - "80.126470 | \n", - "80.863584 | \n", - "64.942144 | \n", - "76.814671 | \n", - "57.116686 | \n", - "22.913504 | \n", - "51.538016 | \n", - "62.613273 | \n", - "
| 0 | \n", - "GritLM/GritLM-7B | \n", - "2 (1258) | \n", - "60.930855 | \n", - "60.122013 | \n", - "70.531726 | \n", - "79.944411 | \n", - "61.830247 | \n", - "73.327960 | \n", - "58.306711 | \n", - "22.773771 | \n", - "50.482520 | \n", - "63.778754 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "3 (1233) | \n", - "60.279746 | \n", - "59.899276 | \n", - "70.579905 | \n", - "81.122211 | \n", - "60.314295 | \n", - "74.021600 | \n", - "55.750101 | \n", - "22.196821 | \n", - "51.390095 | \n", - "63.819183 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4 (1109) | \n", - "58.570572 | \n", - "58.159720 | \n", - "71.666250 | \n", - "79.028390 | \n", - "59.916975 | \n", - "73.488372 | \n", - "54.111178 | \n", - "21.302373 | \n", - "42.923755 | \n", - "62.840466 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-base | \n", - "5 (944) | \n", - "57.012959 | \n", - "56.497125 | \n", - "69.437945 | \n", - "77.154385 | \n", - "58.205718 | \n", - "71.444249 | \n", - "52.721823 | \n", - "20.162060 | \n", - "42.674461 | \n", - "60.176360 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "6 (830) | \n", - "52.005259 | \n", - "51.069231 | \n", - "52.062937 | \n", - "81.154392 | \n", - "55.064354 | \n", - "69.661040 | \n", - "39.757752 | \n", - "16.398034 | \n", - "41.080665 | \n", - "53.374677 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "7 (784) | \n", - "55.456701 | \n", - "55.153953 | \n", - "67.472922 | \n", - "76.329053 | \n", - "56.500928 | \n", - "70.360817 | \n", - "49.345019 | \n", - "19.096430 | \n", - "41.735446 | \n", - "60.391010 | \n", - "
| 6 | \n", - "sentence-transformers/LaBSE | \n", - "8 (719) | \n", - "52.100056 | \n", - "51.864835 | \n", - "76.351008 | \n", - "75.969075 | \n", - "54.600844 | \n", - "65.349763 | \n", - "33.169113 | \n", - "20.122117 | \n", - "39.159195 | \n", - "50.197562 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "9 (603) | \n", - "48.781520 | \n", - "47.956315 | \n", - "44.563390 | \n", - "78.993229 | \n", - "51.656889 | \n", - "66.581953 | \n", - "36.614971 | \n", - "14.930329 | \n", - "39.337371 | \n", - "50.972387 | \n", - "
| 9 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "10 (526) | \n", - "42.470049 | \n", - "41.091171 | \n", - "21.161317 | \n", - "70.893513 | \n", - "46.985488 | \n", - "57.599660 | \n", - "32.808557 | \n", - "16.280508 | \n", - "40.765913 | \n", - "42.234410 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "11 (490) | \n", - "42.151564 | \n", - "40.862018 | \n", - "22.908162 | \n", - "71.679313 | \n", - "46.848558 | \n", - "57.202961 | \n", - "32.504190 | \n", - "14.586407 | \n", - "36.839927 | \n", - "44.326628 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "12 (418) | \n", - "41.432105 | \n", - "39.937651 | \n", - "20.093673 | \n", - "71.234656 | \n", - "46.198911 | \n", - "56.084065 | \n", - "32.513445 | \n", - "15.054331 | \n", - "38.037554 | \n", - "40.284573 | \n", - "
| \n", - " | model | \n", - "Average | \n", - "AmazonCounterfactualClassification | \n", - "AmazonPolarityClassification | \n", - "AmazonReviewsClassification | \n", - "ArXivHierarchicalClusteringP2P | \n", - "ArXivHierarchicalClusteringS2S | \n", - "ArguAna | \n", - "AskUbuntuDupQuestions | \n", - "BIOSSES | \n", - "... | \n", - "StackExchangeClusteringP2P.v2 | \n", - "StackOverflowDupQuestions | \n", - "SummEvalSummarization.v2 | \n", - "TRECCOVID | \n", - "Touche2020 | \n", - "ToxicConversationsClassification | \n", - "TweetSentimentExtractionClassification | \n", - "TwentyNewsgroupsClustering.v2 | \n", - "TwitterSemEval2015 | \n", - "TwitterURLCorpus | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Rank | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| 0 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "0.626 | \n", - "0.732 | \n", - "0.963 | \n", - "0.520 | \n", - "0.653 | \n", - "0.613 | \n", - "0.617 | \n", - "0.670 | \n", - "0.855 | \n", - "... | \n", - "0.481 | \n", - "0.549 | \n", - "NaN | \n", - "0.870 | \n", - "0.263 | \n", - "0.717 | \n", - "0.649 | \n", - "0.533 | \n", - "0.816 | \n", - "0.878 | \n", - "
| 1 | \n", - "GritLM/GritLM-7B | \n", - "0.626 | \n", - "0.792 | \n", - "0.966 | \n", - "0.556 | \n", - "0.598 | \n", - "0.623 | \n", - "0.632 | \n", - "0.674 | \n", - "0.863 | \n", - "... | \n", - "0.438 | \n", - "0.559 | \n", - "NaN | \n", - "0.743 | \n", - "0.278 | \n", - "0.688 | \n", - "0.663 | \n", - "0.573 | \n", - "0.811 | \n", - "0.874 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "0.611 | \n", - "0.681 | \n", - "0.962 | \n", - "0.508 | \n", - "0.625 | \n", - "0.613 | \n", - "0.585 | \n", - "0.644 | \n", - "0.875 | \n", - "... | \n", - "0.461 | \n", - "0.525 | \n", - "NaN | \n", - "0.825 | \n", - "0.274 | \n", - "0.668 | \n", - "0.592 | \n", - "0.507 | \n", - "0.798 | \n", - "0.867 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "0.569 | \n", - "0.762 | \n", - "0.933 | \n", - "0.439 | \n", - "0.556 | \n", - "0.562 | \n", - "0.544 | \n", - "0.592 | \n", - "0.825 | \n", - "... | \n", - "0.385 | \n", - "0.501 | \n", - "0.314 | \n", - "0.712 | \n", - "0.231 | \n", - "0.660 | \n", - "0.628 | \n", - "0.392 | \n", - "0.753 | \n", - "0.858 | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-base | \n", - "0.556 | \n", - "0.743 | \n", - "0.918 | \n", - "0.425 | \n", - "0.567 | \n", - "0.561 | \n", - "0.442 | \n", - "0.593 | \n", - "0.851 | \n", - "... | \n", - "0.389 | \n", - "0.497 | \n", - "NaN | \n", - "0.695 | \n", - "0.215 | \n", - "0.643 | \n", - "0.628 | \n", - "0.358 | \n", - "0.722 | \n", - "0.855 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "0.536 | \n", - "0.695 | \n", - "0.886 | \n", - "0.408 | \n", - "0.543 | \n", - "0.542 | \n", - "0.391 | \n", - "0.564 | \n", - "0.825 | \n", - "... | \n", - "0.376 | \n", - "0.470 | \n", - "NaN | \n", - "0.726 | \n", - "0.212 | \n", - "0.636 | \n", - "0.628 | \n", - "0.345 | \n", - "0.708 | \n", - "0.850 | \n", - "
| 6 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "0.531 | \n", - "0.622 | \n", - "0.671 | \n", - "0.268 | \n", - "0.615 | \n", - "0.565 | \n", - "0.465 | \n", - "0.659 | \n", - "0.804 | \n", - "... | \n", - "0.403 | \n", - "0.520 | \n", - "NaN | \n", - "0.513 | \n", - "0.199 | \n", - "0.611 | \n", - "0.550 | \n", - "0.501 | \n", - "0.739 | \n", - "0.851 | \n", - "
| 7 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.517 | \n", - "0.729 | \n", - "0.764 | \n", - "0.386 | \n", - "0.553 | \n", - "0.552 | \n", - "0.489 | \n", - "0.602 | \n", - "0.763 | \n", - "... | \n", - "0.382 | \n", - "0.468 | \n", - "NaN | \n", - "0.379 | \n", - "0.174 | \n", - "0.656 | \n", - "0.590 | \n", - "0.452 | \n", - "0.688 | \n", - "0.853 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "0.516 | \n", - "0.624 | \n", - "0.630 | \n", - "0.264 | \n", - "0.574 | \n", - "0.551 | \n", - "0.471 | \n", - "0.641 | \n", - "0.836 | \n", - "... | \n", - "0.389 | \n", - "0.515 | \n", - "NaN | \n", - "0.508 | \n", - "0.172 | \n", - "0.633 | \n", - "0.542 | \n", - "0.470 | \n", - "0.700 | \n", - "0.848 | \n", - "
| 9 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "0.512 | \n", - "0.620 | \n", - "0.643 | \n", - "0.265 | \n", - "0.591 | \n", - "0.545 | \n", - "0.502 | \n", - "0.635 | \n", - "0.816 | \n", - "... | \n", - "0.403 | \n", - "0.508 | \n", - "NaN | \n", - "0.472 | \n", - "0.169 | \n", - "0.621 | \n", - "0.540 | \n", - "0.460 | \n", - "0.679 | \n", - "0.847 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.497 | \n", - "0.683 | \n", - "0.692 | \n", - "0.354 | \n", - "0.536 | \n", - "0.522 | \n", - "0.449 | \n", - "0.605 | \n", - "0.742 | \n", - "... | \n", - "0.375 | \n", - "0.458 | \n", - "NaN | \n", - "0.391 | \n", - "0.161 | \n", - "0.601 | \n", - "0.561 | \n", - "0.407 | \n", - "0.651 | \n", - "0.838 | \n", - "
| 11 | \n", - "sentence-transformers/LaBSE | \n", - "0.426 | \n", - "0.754 | \n", - "0.689 | \n", - "0.378 | \n", - "0.534 | \n", - "0.500 | \n", - "0.342 | \n", - "0.527 | \n", - "0.787 | \n", - "... | \n", - "0.353 | \n", - "0.424 | \n", - "NaN | \n", - "0.163 | \n", - "0.049 | \n", - "0.632 | \n", - "0.588 | \n", - "0.242 | \n", - "0.628 | \n", - "0.846 | \n", - "
12 rows \u00d7 66 columns
\n", - "| \n", - " | model | \n", - "Average_v1 | \n", - "Rank_v1 | \n", - "Average_v2 | \n", - "Rank_v2 | \n", - "
|---|---|---|---|---|---|
| 0 | \n", - "GritLM/GritLM-7B | \n", - "66.76 | \n", - "1.0 | \n", - "62.576234 | \n", - "2.0 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "66.63 | \n", - "2.0 | \n", - "62.623036 | \n", - "1.0 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "64.41 | \n", - "3.0 | \n", - "61.127328 | \n", - "3.0 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "60.89 | \n", - "4.0 | \n", - "56.925622 | \n", - "4.0 | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-base | \n", - "59.11 | \n", - "5.0 | \n", - "55.578989 | \n", - "5.0 | \n", - "
| 5 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "57.78 | \n", - "6.0 | \n", - "53.148536 | \n", - "7.0 | \n", - "
| 6 | \n", - "intfloat/multilingual-e5-small | \n", - "57.04 | \n", - "7.0 | \n", - "53.556709 | \n", - "6.0 | \n", - "
| 7 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "54.64 | \n", - "10.0 | \n", - "51.724491 | \n", - "8.0 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "56.53 | \n", - "8.0 | \n", - "51.633906 | \n", - "9.0 | \n", - "
| 9 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "56.10 | \n", - "9.0 | \n", - "51.215533 | \n", - "10.0 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "52.45 | \n", - "11.0 | \n", - "49.685354 | \n", - "11.0 | \n", - "
| 11 | \n", - "sentence-transformers/LaBSE | \n", - "45.21 | \n", - "12.0 | \n", - "42.554815 | \n", - "12.0 | \n", - "
| \n", - " | DBPediaHardNegatives | \n", - "SummEvalSummarization.v2 | \n", - "
|---|---|---|
| Rank | \n", - "\n", - " | \n", - " |
| 0 | \n", - "0.46571 | \n", - "NaN | \n", - "
| 1 | \n", - "0.40407 | \n", - "NaN | \n", - "
| 2 | \n", - "NaN | \n", - "NaN | \n", - "
| 3 | \n", - "0.42475 | \n", - "0.314073 | \n", - "
| 4 | \n", - "0.42578 | \n", - "NaN | \n", - "
| 5 | \n", - "0.40379 | \n", - "NaN | \n", - "
| 6 | \n", - "0.35720 | \n", - "NaN | \n", - "
| 7 | \n", - "0.30667 | \n", - "NaN | \n", - "
| 8 | \n", - "0.36958 | \n", - "NaN | \n", - "
| 9 | \n", - "0.35697 | \n", - "NaN | \n", - "
| 10 | \n", - "0.27419 | \n", - "NaN | \n", - "
| 11 | \n", - "0.21176 | \n", - "NaN | \n", - "
| \n", - " | model | \n", - "Average_v2_full | \n", - "Rank_v2_full | \n", - "Average_v2_lite | \n", - "Rank_v2_lite | \n", - "
|---|---|---|---|---|---|
| 0 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "0.63 | \n", - "1.0 | \n", - "0.67 | \n", - "1.0 | \n", - "
| 1 | \n", - "GritLM/GritLM-7B | \n", - "0.63 | \n", - "2.0 | \n", - "0.66 | \n", - "2.0 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "0.61 | \n", - "3.0 | \n", - "0.65 | \n", - "3.0 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "0.57 | \n", - "4.0 | \n", - "0.62 | \n", - "4.0 | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-base | \n", - "0.56 | \n", - "5.0 | \n", - "0.60 | \n", - "5.0 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "0.54 | \n", - "6.0 | \n", - "0.58 | \n", - "6.0 | \n", - "
| 6 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "0.53 | \n", - "7.0 | \n", - "0.56 | \n", - "8.0 | \n", - "
| 7 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.52 | \n", - "8.0 | \n", - "0.57 | \n", - "7.0 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "0.52 | \n", - "9.0 | \n", - "0.55 | \n", - "10.0 | \n", - "
| 9 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "0.51 | \n", - "10.0 | \n", - "0.54 | \n", - "11.0 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.50 | \n", - "11.0 | \n", - "0.55 | \n", - "9.0 | \n", - "
| 11 | \n", - "sentence-transformers/LaBSE | \n", - "0.43 | \n", - "12.0 | \n", - "0.49 | \n", - "12.0 | \n", - "
| \n", - " | model | \n", - "Average_v2_full | \n", - "Rank_v2_full | \n", - "Average_v2_lite | \n", - "Rank_v2_lite | \n", - "Average | \n", - "Rank | \n", - "
|---|---|---|---|---|---|---|---|
| 0 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "0.63 | \n", - "1.0 | \n", - "0.67 | \n", - "1.0 | \n", - "66.63 | \n", - "2.0 | \n", - "
| 1 | \n", - "GritLM/GritLM-7B | \n", - "0.63 | \n", - "2.0 | \n", - "0.66 | \n", - "2.0 | \n", - "66.76 | \n", - "1.0 | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "0.61 | \n", - "3.0 | \n", - "0.65 | \n", - "3.0 | \n", - "64.41 | \n", - "3.0 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "0.57 | \n", - "4.0 | \n", - "0.62 | \n", - "4.0 | \n", - "60.89 | \n", - "4.0 | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-base | \n", - "0.56 | \n", - "5.0 | \n", - "0.60 | \n", - "5.0 | \n", - "59.11 | \n", - "5.0 | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "0.54 | \n", - "6.0 | \n", - "0.58 | \n", - "6.0 | \n", - "57.04 | \n", - "7.0 | \n", - "
| 6 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "0.53 | \n", - "7.0 | \n", - "0.56 | \n", - "8.0 | \n", - "57.78 | \n", - "6.0 | \n", - "
| 7 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.52 | \n", - "8.0 | \n", - "0.57 | \n", - "7.0 | \n", - "54.64 | \n", - "10.0 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "0.52 | \n", - "9.0 | \n", - "0.55 | \n", - "10.0 | \n", - "56.53 | \n", - "8.0 | \n", - "
| 9 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "0.51 | \n", - "10.0 | \n", - "0.54 | \n", - "11.0 | \n", - "56.10 | \n", - "9.0 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.50 | \n", - "11.0 | \n", - "0.55 | \n", - "9.0 | \n", - "52.45 | \n", - "11.0 | \n", - "
| 11 | \n", - "sentence-transformers/LaBSE | \n", - "0.43 | \n", - "12.0 | \n", - "0.49 | \n", - "12.0 | \n", - "45.21 | \n", - "12.0 | \n", - "
| \n", - " | model | \n", - "Average_v2_full | \n", - "Rank_v2_full | \n", - "Average_v2_lite | \n", - "Rank_v2_lite | \n", - "Average | \n", - "Rank | \n", - "Embedding Size | \n", - "Model Name | \n", - "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "0.626230 | \n", - "1.0 | \n", - "66.963823 | \n", - "1.0 | \n", - "66.63 | \n", - "2.0 | \n", - "4096 | \n", - "e5-mistral-7b-instruct | \n", - "
| 1 | \n", - "GritLM/GritLM-7B | \n", - "0.625762 | \n", - "2.0 | \n", - "66.448032 | \n", - "2.0 | \n", - "66.76 | \n", - "1.0 | \n", - "4096 | \n", - "GritLM-7B | \n", - "
| 2 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "0.611273 | \n", - "3.0 | \n", - "65.236396 | \n", - "3.0 | \n", - "64.41 | \n", - "3.0 | \n", - "1024 | \n", - "multilingual-e5-large-instruct | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "0.569256 | \n", - "4.0 | \n", - "62.070275 | \n", - "4.0 | \n", - "60.89 | \n", - "4.0 | \n", - "1024 | \n", - "multilingual-e5-large | \n", - "
| 4 | \n", - "intfloat/multilingual-e5-base | \n", - "0.555790 | \n", - "5.0 | \n", - "60.235039 | \n", - "5.0 | \n", - "59.11 | \n", - "5.0 | \n", - "768 | \n", - "multilingual-e5-base | \n", - "
| 5 | \n", - "intfloat/multilingual-e5-small | \n", - "0.535567 | \n", - "6.0 | \n", - "58.444347 | \n", - "6.0 | \n", - "57.04 | \n", - "7.0 | \n", - "384 | \n", - "multilingual-e5-small | \n", - "
| 6 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "0.531485 | \n", - "7.0 | \n", - "56.019392 | \n", - "8.0 | \n", - "57.78 | \n", - "6.0 | \n", - "768 | \n", - "all-mpnet-base | \n", - "
| 7 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.517245 | \n", - "8.0 | \n", - "57.290940 | \n", - "7.0 | \n", - "54.64 | \n", - "10.0 | \n", - "768 | \n", - "multilingual-mpnet-base | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "0.516339 | \n", - "9.0 | \n", - "54.728697 | \n", - "10.0 | \n", - "56.53 | \n", - "8.0 | \n", - "384 | \n", - "all-MiniLM-L12 | \n", - "
| 9 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "0.512155 | \n", - "10.0 | \n", - "54.381772 | \n", - "11.0 | \n", - "56.10 | \n", - "9.0 | \n", - "384 | \n", - "all-MiniLM-L6 | \n", - "
| 10 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "0.496854 | \n", - "11.0 | \n", - "55.130507 | \n", - "9.0 | \n", - "52.45 | \n", - "11.0 | \n", - "384 | \n", - "multilingual-MiniLM-L12 | \n", - "
| 11 | \n", - "sentence-transformers/LaBSE | \n", - "0.425548 | \n", - "12.0 | \n", - "48.570700 | \n", - "12.0 | \n", - "45.21 | \n", - "12.0 | \n", - "768 | \n", - "LaBSE | \n", - "
| \n", - " | model | \n", - "revision | \n", - "mean | \n", - "mean (Clustering) | \n", - "mean (STS) | \n", - "mean (Classification) | \n", - "mean (Reranking) | \n", - "mean (Retrieval) | \n", - "mean (PairClassification) | \n", - "mean (weighted by task type) | \n", - "borda_count | \n", - "Total Evaluation time (hours) | \n", - "Total CO2-eq emissions (kg) | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.670 | \n", - "0.514 | \n", - "0.836 | \n", - "0.752 | \n", - "0.498 | \n", - "0.548 | \n", - "0.884 | \n", - "0.672 | \n", - "393.0 | \n", - "2.502 | \n", - "2.971 | \n", - "
| 2 | \n", - "GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.664 | \n", - "0.508 | \n", - "0.825 | \n", - "0.770 | \n", - "0.496 | \n", - "0.532 | \n", - "0.873 | \n", - "0.667 | \n", - "384.0 | \n", - "3.111 | \n", - "3.409 | \n", - "
| 7 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.652 | \n", - "0.499 | \n", - "0.843 | \n", - "0.732 | \n", - "0.487 | \n", - "0.510 | \n", - "0.862 | \n", - "0.656 | \n", - "357.0 | \n", - "2.033 | \n", - "1.418 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.621 | \n", - "0.428 | \n", - "0.806 | \n", - "0.728 | \n", - "0.447 | \n", - "0.490 | \n", - "0.847 | \n", - "0.624 | \n", - "270.0 | \n", - "2.549 | \n", - "1.563 | \n", - "
| 6 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "0.560 | \n", - "0.466 | \n", - "0.722 | \n", - "0.566 | \n", - "0.484 | \n", - "0.419 | \n", - "0.830 | \n", - "0.581 | \n", - "211.0 | \n", - "1.190 | \n", - "0.688 | \n", - "
| 9 | \n", - "intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.602 | \n", - "0.422 | \n", - "0.791 | \n", - "0.700 | \n", - "0.443 | \n", - "0.461 | \n", - "0.836 | \n", - "0.609 | \n", - "211.0 | \n", - "1.170 | \n", - "0.691 | \n", - "
| 4 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "0.573 | \n", - "0.435 | \n", - "0.798 | \n", - "0.686 | \n", - "0.452 | \n", - "0.341 | \n", - "0.817 | \n", - "0.588 | \n", - "188.0 | \n", - "1.017 | \n", - "0.563 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "0.547 | \n", - "0.446 | \n", - "0.707 | \n", - "0.558 | \n", - "0.475 | \n", - "0.407 | \n", - "0.825 | \n", - "0.570 | \n", - "172.0 | \n", - "0.814 | \n", - "0.442 | \n", - "
| 10 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "0.544 | \n", - "0.449 | \n", - "0.704 | \n", - "0.554 | \n", - "0.471 | \n", - "0.398 | \n", - "0.824 | \n", - "0.567 | \n", - "149.0 | \n", - "0.733 | \n", - "0.391 | \n", - "
| 0 | \n", - "intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "0.584 | \n", - "0.408 | \n", - "0.776 | \n", - "0.677 | \n", - "0.432 | \n", - "0.437 | \n", - "0.827 | \n", - "0.593 | \n", - "147.0 | \n", - "0.833 | \n", - "0.459 | \n", - "
| 5 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "0.551 | \n", - "0.417 | \n", - "0.775 | \n", - "0.644 | \n", - "0.454 | \n", - "0.328 | \n", - "0.800 | \n", - "0.570 | \n", - "109.0 | \n", - "0.879 | \n", - "0.469 | \n", - "
| 1 | \n", - "sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "0.486 | \n", - "0.361 | \n", - "0.702 | \n", - "0.668 | \n", - "0.413 | \n", - "0.168 | \n", - "0.789 | \n", - "0.517 | \n", - "49.0 | \n", - "1.020 | \n", - "0.582 | \n", - "
| \n", - " | task | \n", - "AILACasedocs | \n", - "AILAStatutes | \n", - "ARCChallenge | \n", - "AfriSentiClassification | \n", - "AfriSentiLangClassification | \n", - "AllegroReviews | \n", - "AlloProfClusteringP2P.v2 | \n", - "AlloProfClusteringS2S.v2 | \n", - "AlloprofReranking | \n", - "AlloprofRetrieval | \n", - "... | \n", - "WikiCitiesClustering | \n", - "WikiClusteringP2P.v2 | \n", - "WikipediaRerankingMultilingual | \n", - "WikipediaRetrievalMultilingual | \n", - "WinoGrande | \n", - "XMarket | \n", - "XNLI | \n", - "XPQARetrieval | \n", - "XQuADRetrieval | \n", - "YelpReviewFullClassification | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| model | \n", - "revision | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.35292 | \n", - "0.41800 | \n", - "0.26677 | \n", - "0.439404 | \n", - "0.931445 | \n", - "0.567694 | \n", - "0.671576 | \n", - "0.564118 | \n", - "0.779262 | \n", - "0.55422 | \n", - "... | \n", - "0.836619 | \n", - "0.276693 | \n", - "0.924117 | \n", - "0.934731 | \n", - "0.53697 | \n", - "0.259600 | \n", - "0.784399 | \n", - "0.493916 | \n", - "0.961998 | \n", - "0.650635 | \n", - "
| intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.36662 | \n", - "0.34535 | \n", - "0.19001 | \n", - "0.406934 | \n", - "0.921680 | \n", - "0.597813 | \n", - "0.691183 | \n", - "0.571120 | \n", - "0.783177 | \n", - "0.54619 | \n", - "... | \n", - "0.890336 | \n", - "0.287826 | \n", - "0.916219 | \n", - "0.927265 | \n", - "0.39514 | \n", - "0.287633 | \n", - "0.821737 | \n", - "0.456863 | \n", - "0.951960 | \n", - "0.618311 | \n", - "
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.26053 | \n", - "0.20371 | \n", - "0.09611 | \n", - "0.400586 | \n", - "0.671191 | \n", - "0.407753 | \n", - "0.631008 | \n", - "0.341132 | \n", - "0.658972 | \n", - "0.34447 | \n", - "... | \n", - "0.798718 | \n", - "0.241045 | \n", - "0.886177 | \n", - "0.899056 | \n", - "0.56177 | \n", - "0.167343 | \n", - "0.718563 | \n", - "0.391408 | \n", - "0.963752 | \n", - "0.597217 | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.26427 | \n", - "0.20842 | \n", - "0.10828 | \n", - "0.423193 | \n", - "0.642822 | \n", - "0.410437 | \n", - "0.636065 | \n", - "0.351508 | \n", - "0.694429 | \n", - "0.39341 | \n", - "... | \n", - "0.755041 | \n", - "0.249324 | \n", - "0.905086 | \n", - "0.917812 | \n", - "0.54985 | \n", - "0.171770 | \n", - "0.749804 | \n", - "0.457246 | \n", - "0.974800 | \n", - "0.643164 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.33330 | \n", - "0.29659 | \n", - "0.15027 | \n", - "0.423486 | \n", - "0.914404 | \n", - "0.524254 | \n", - "0.669222 | \n", - "0.564657 | \n", - "0.746777 | \n", - "0.52118 | \n", - "... | \n", - "0.762207 | \n", - "0.287510 | \n", - "0.918727 | \n", - "0.926954 | \n", - "0.54272 | \n", - "0.256423 | \n", - "0.806215 | \n", - "0.504125 | \n", - "0.970556 | \n", - "0.652686 | \n", - "
5 rows × 381 columns
\n", - "| \n", - " | task | \n", - "
|---|---|
| model | \n", - "revision | \n", - "
| \n", - " | task | \n", - "Diversity1LegalBenchClassification | \n", - "Diversity2LegalBenchClassification | \n", - "
|---|---|---|---|
| model | \n", - "revision | \n", - "\n", - " | \n", - " |
| GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "0.763333 | \n", - "0.746667 | \n", - "
| \n", - " | \n", - " | Languages | \n", - "Domains | \n", - "License | \n", - "Description | \n", - "
|---|---|---|---|---|---|
| Type | \n", - "Name | \n", - "\n", - " | \n", - " | \n", - " | \n", - " |
| BitextMining | \n", - "BornholmBitextMining | \n", - "{dan} | \n", - "[Web, Social, Fiction, Written] | \n", - "CC-BY-4.0 | \n", - "Danish Bornholmsk Parallel Corpus. Bornholmsk ... | \n", - "
| BibleNLPBitextMining | \n", - "{hrv, lit, por, ita, nld, dan, ces, spa, pol, ... | \n", - "[Religious, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Partial Bible translations in 829 languages, a... | \n", - "|
| BUCC.v2 | \n", - "{eng, fra, deu} | \n", - "[Written] | \n", - "Unknown | \n", - "BUCC bitext mining dataset | \n", - "|
| DiaBlaBitextMining | \n", - "{eng, fra} | \n", - "[Social, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "English-French Parallel Corpus. DiaBLa is an E... | \n", - "|
| FloresBitextMining | \n", - "{fin, nob, ces, pol, swe, eng, lit, slk, nno, ... | \n", - "[Non-fiction, Encyclopaedic, Written] | \n", - "CC BY-SA 4.0 | \n", - "FLORES is a benchmark dataset for machine tran... | \n", - "|
| NorwegianCourtsBitextMining | \n", - "{nob, nno} | \n", - "[Legal, Written] | \n", - "CC BY 4.0 | \n", - "Nynorsk and Bokmål parallel corpus from Norweg... | \n", - "|
| NTREXBitextMining | \n", - "{fin, nob, ces, pol, swe, eng, lit, slk, nno, ... | \n", - "[News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "NTREX is a News Test References dataset for Ma... | \n", - "|
| Classification | \n", - "BulgarianStoreReviewSentimentClassfication | \n", - "{bul} | \n", - "[Reviews, Written] | \n", - "cc-by-4.0 | \n", - "Bulgarian online store review dataset for sent... | \n", - "
| CzechProductReviewSentimentClassification | \n", - "{ces} | \n", - "[Reviews, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "User reviews of products on Czech e-shop Mall.... | \n", - "|
| GreekLegalCodeClassification | \n", - "{ell} | \n", - "[Legal, Written] | \n", - "cc-by-4.0 | \n", - "Greek Legal Code Dataset for Classification. (... | \n", - "|
| DBpediaClassification | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "DBpedia14 is a dataset of English texts from W... | \n", - "|
| FinancialPhrasebankClassification | \n", - "{eng} | \n", - "[News, Written] | \n", - "cc-by-nc-sa-3.0 | \n", - "Polar sentiment dataset of sentences from fina... | \n", - "|
| PoemSentimentClassification | \n", - "{eng} | \n", - "[Reviews, Written] | \n", - "CC-BY-4.0 | \n", - "Poem Sentiment is a sentiment dataset of poem ... | \n", - "|
| ToxicChatClassification | \n", - "{eng} | \n", - "[Constructed, Written] | \n", - "cc-by-4.0 | \n", - "This dataset contains toxicity annotations on ... | \n", - "|
| ToxicConversationsClassification | \n", - "{eng} | \n", - "[Social, Written] | \n", - "CC BY 4.0 | \n", - "Collection of comments from the Civil Comments... | \n", - "|
| EstonianValenceClassification | \n", - "{est} | \n", - "[News, Written] | \n", - "CC BY 4.0 | \n", - "Dataset containing annotated Estonian news dat... | \n", - "|
| ItaCaseholdClassification | \n", - "{ita} | \n", - "[Legal, Government, Written] | \n", - "Apache 2.0 | \n", - "An Italian Dataset consisting of 1101 pairs of... | \n", - "|
| AmazonCounterfactualClassification | \n", - "{eng, deu} | \n", - "[Reviews, Written] | \n", - "CC BY 4.0 | \n", - "A collection of Amazon customer reviews annota... | \n", - "|
| MassiveScenarioClassification | \n", - "{fin, por, nob, ita, nld, dan, lav, ell, isl, ... | \n", - "[Spoken] | \n", - "Apache 2.0 | \n", - "MASSIVE: A 1M-Example Multilingual Natural Lan... | \n", - "|
| MultiHateClassification | \n", - "{por, ita, nld, spa, pol, fra, eng, deu} | \n", - "[Constructed, Written] | \n", - "cc-by-4.0 | \n", - "Hate speech detection dataset with binary\\n ... | \n", - "|
| NordicLangClassification | \n", - "{nob, nno, dan, isl, swe} | \n", - "[Encyclopaedic] | \n", - "cc-by-sa-3.0 | \n", - "A dataset for Nordic language identification. | \n", - "|
| ScalaClassification | \n", - "{swe, dan, nob, nno} | \n", - "[Fiction, News, Non-fiction, Blog, Spoken, Web... | \n", - "CC BY-SA 4.0 | \n", - "ScaLa a linguistic acceptability dataset for t... | \n", - "|
| SwissJudgementClassification | \n", - "{deu, fra, ita} | \n", - "[Legal, Written] | \n", - "CC-BY-4.0 | \n", - "Multilingual, diachronic dataset of Swiss Fede... | \n", - "|
| TweetSentimentClassification | \n", - "{por, ita, spa, fra, eng, deu} | \n", - "[Social, Written] | \n", - "cc-by-3.0 | \n", - "A multilingual Sentiment Analysis dataset cons... | \n", - "|
| CBD | \n", - "{pol} | \n", - "[Written, Social] | \n", - "bsd-3-clause | \n", - "Polish Tweets annotated for cyberbullying dete... | \n", - "|
| PolEmo2.0-OUT | \n", - "{pol} | \n", - "[Written, Social] | \n", - "cc-by-sa-4.0 | \n", - "A collection of Polish online reviews from fou... | \n", - "|
| CSFDSKMovieReviewSentimentClassification | \n", - "{slk} | \n", - "[Reviews, Written] | \n", - "CC-BY-SA-4.0 | \n", - "The dataset contains 30k user reviews from csf... | \n", - "|
| DalajClassification | \n", - "{swe} | \n", - "[Non-fiction, Written] | \n", - "CC-BY-4.0 | \n", - "A Swedish dataset for linguistic acceptability... | \n", - "|
| Clustering | \n", - "WikiCitiesClustering | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-4.0 | \n", - "Clustering of Wikipedia articles of cities by ... | \n", - "
| RomaniBibleClustering | \n", - "{rom} | \n", - "[Religious, Written] | \n", - "MIT | \n", - "Clustering verses from the Bible in Kalderash ... | \n", - "|
| BigPatentClustering.v2 | \n", - "{eng} | \n", - "[Legal, Written] | \n", - "cc-by-4.0 | \n", - "Clustering of documents from the Big Patent da... | \n", - "|
| BiorxivClusteringP2P.v2 | \n", - "{eng} | \n", - "[Academic, Written] | \n", - "https://www.biorxiv.org/content/about-biorxiv | \n", - "Clustering of titles+abstract from biorxiv acr... | \n", - "|
| AlloProfClusteringS2S.v2 | \n", - "{fra} | \n", - "[Encyclopaedic, Written] | \n", - "mit | \n", - "Clustering of document titles from Allo Prof d... | \n", - "|
| HALClusteringS2S.v2 | \n", - "{fra} | \n", - "[Academic, Written] | \n", - "Apache-2.0 | \n", - "Clustering of titles from HAL (https://hugging... | \n", - "|
| SIB200ClusteringS2S | \n", - "{fin, nob, ces, pol, swe, eng, lit, slk, nno, ... | \n", - "[News, Written] | \n", - "cc-by-sa-4.0 | \n", - "SIB-200 is the largest publicly available topi... | \n", - "|
| WikiClusteringP2P.v2 | \n", - "{lav, dan, ces, eus, mlt} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "Clustering of wikipedia articles inspired by B... | \n", - "|
| Retrieval | \n", - "StackOverflowQA | \n", - "{eng} | \n", - "[Programming, Written] | \n", - "MIT | \n", - "The dataset is a collection of natural languag... | \n", - "
| TwitterHjerneRetrieval | \n", - "{dan} | \n", - "[Social, Written] | \n", - "CC BY 4.0 | \n", - "Danish question asked on Twitter with the Hash... | \n", - "|
| LegalQuAD | \n", - "{deu} | \n", - "[Legal, Written] | \n", - "CC BY 4.0 | \n", - "The dataset consists of questions and legal do... | \n", - "|
| ArguAna | \n", - "{eng} | \n", - "[Medical, Written] | \n", - "cc-by-sa-4.0 | \n", - "NFCorpus: A Full-Text Learning to Rank Dataset... | \n", - "|
| HagridRetrieval | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "apache-2.0 | \n", - "HAGRID (Human-in-the-loop Attributable Generat... | \n", - "|
| LegalBenchCorporateLobbying | \n", - "{eng} | \n", - "[Legal, Written] | \n", - "CC BY 4.0 | \n", - "The dataset includes bill titles and bill summ... | \n", - "|
| LEMBPasskeyRetrieval | \n", - "{eng} | \n", - "[Fiction, Written] | \n", - "Not specified | \n", - "passkey subset of dwzhu/LongEmbed dataset. | \n", - "|
| SCIDOCS | \n", - "{eng} | \n", - "[Academic, Written, Non-fiction] | \n", - "cc-by-sa-4.0 | \n", - "SciDocs, a new evaluation benchmark consisting... | \n", - "|
| SpartQA | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "MIT | \n", - "Measuring the ability to retrieve the groundtr... | \n", - "|
| TempReasonL1 | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "CC BY-SA 3.0 | \n", - "Measuring the ability to retrieve the groundtr... | \n", - "|
| WinoGrande | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "CC BY | \n", - "Measuring the ability to retrieve the groundtr... | \n", - "|
| AlloprofRetrieval | \n", - "{fra} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-nc-sa-4.0 | \n", - "This dataset was provided by AlloProf, an orga... | \n", - "|
| BelebeleRetrieval | \n", - "{fin, nob, ces, pol, swe, eng, lit, slk, nld, ... | \n", - "[Web, News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Belebele is a multiple-choice machine reading ... | \n", - "|
| StatcanDialogueDatasetRetrieval | \n", - "{eng, fra} | \n", - "[Government, Web, Written] | \n", - "https://huggingface.co/datasets/McGill-NLP/sta... | \n", - "A Dataset for Retrieving Data Tables through C... | \n", - "|
| WikipediaRetrievalMultilingual | \n", - "{fin, por, ita, nld, dan, ces, ron, swe, eng, ... | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "The dataset is derived from Cohere's wikipedia... | \n", - "|
| InstructionRetrieval | \n", - "Core17InstructionRetrieval | \n", - "{eng} | \n", - "[News, Written] | \n", - "MIT | \n", - "Measuring retrieval instruction following abil... | \n", - "
| News21InstructionRetrieval | \n", - "{eng} | \n", - "[News, Written] | \n", - "MIT | \n", - "Measuring retrieval instruction following abil... | \n", - "|
| Robust04InstructionRetrieval | \n", - "{eng} | \n", - "[News, Written] | \n", - "MIT | \n", - "Measuring retrieval instruction following abil... | \n", - "|
| MultilabelClassification | \n", - "MalteseNewsClassification | \n", - "{mlt} | \n", - "[Constructed, Written] | \n", - "cc-by-nc-sa-4.0 | \n", - "A multi-label topic classification dataset for... | \n", - "
| MultiEURLEXMultilabelClassification | \n", - "{fin, ces, pol, swe, eng, lit, slk, nld, dan, ... | \n", - "[Legal, Government, Written] | \n", - "CC BY-SA 4.0 | \n", - "EU laws in 23 EU languages containing gold lab... | \n", - "|
| PairClassification | \n", - "CTKFactsNLI | \n", - "{ces} | \n", - "[News, Written] | \n", - "CC-BY-SA-3.0 | \n", - "Czech Natural Language Inference dataset of ar... | \n", - "
| SprintDuplicateQuestions | \n", - "{eng} | \n", - "[Programming, Written] | \n", - "Not specified | \n", - "Duplicate questions from the Sprint community. | \n", - "|
| OpusparcusPC | \n", - "{fin, fra, swe, eng, deu} | \n", - "[Spoken, Spoken] | \n", - "cc-by-nc-4.0 | \n", - "Opusparcus is a paraphrase corpus for six Euro... | \n", - "|
| RTE3 | \n", - "{deu, eng, fra, ita} | \n", - "[News, Web, Encyclopaedic, Written] | \n", - "cc-by-4.0 | \n", - "Recognising Textual Entailment Challenge (RTE-... | \n", - "|
| XNLI | \n", - "{ell, spa, fra, deu, eng, bul} | \n", - "[Non-fiction, Fiction, Government, Written] | \n", - "Not specified | \n", - "\n", - " | |
| PSC | \n", - "{pol} | \n", - "[News, Written] | \n", - "cc-by-3 | \n", - "Polish Summaries Corpus | \n", - "|
| Reranking | \n", - "WebLINXCandidatesReranking | \n", - "{eng} | \n", - "[Academic, Web, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "WebLINX is a large-scale benchmark of 100K int... | \n", - "
| AlloprofReranking | \n", - "{fra} | \n", - "[Web, Academic, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "This dataset was provided by AlloProf, an orga... | \n", - "|
| WikipediaRerankingMultilingual | \n", - "{fin, por, ita, nld, dan, ces, ron, swe, eng, ... | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "The dataset is derived from Cohere's wikipedia... | \n", - "|
| STS | \n", - "SICK-R | \n", - "{eng} | \n", - "None | \n", - "None | \n", - "Semantic Textual Similarity SICK-R dataset as ... | \n", - "
| STS12 | \n", - "{eng} | \n", - "[Encyclopaedic, News, Written] | \n", - "Not specified | \n", - "SemEval-2012 Task 6. | \n", - "|
| STS14 | \n", - "{eng} | \n", - "[Blog, Web, Spoken] | \n", - "Not specified | \n", - "SemEval STS 2014 dataset. Currently only the E... | \n", - "|
| STS15 | \n", - "{eng} | \n", - "[Blog, News, Web, Written, Spoken] | \n", - "Not specified | \n", - "SemEval STS 2015 dataset | \n", - "|
| STSBenchmark | \n", - "{eng} | \n", - "None | \n", - "None | \n", - "Semantic Textual Similarity Benchmark (STSbenc... | \n", - "|
| FinParaSTS | \n", - "{fin} | \n", - "[News, Subtitles, Written] | \n", - "cc-by-sa-4.0 | \n", - "Finnish paraphrase-based semantic similarity c... | \n", - "|
| STS17 | \n", - "{ita, nld, spa, fra, eng, deu} | \n", - "[News, Web, Written] | \n", - "Not specified | \n", - "Semeval-2017 task 1: Semantic textual similari... | \n", - "|
| SICK-R-PL | \n", - "{pol} | \n", - "[Web, Written] | \n", - "CC-BY-NC-SA-3.0 | \n", - "Polish version of SICK dataset for textual rel... | \n", - "|
| STSES | \n", - "{spa} | \n", - "[Written] | \n", - "cc-by-4.0 | \n", - "Spanish test sets from SemEval-2014 (Agirre et... | \n", - "
| \n", - " | model | \n", - "revision | \n", - "mean | \n", - "mean (Classification) | \n", - "mean (Retrieval) | \n", - "mean (PairClassification) | \n", - "mean (BitextMining) | \n", - "mean (Clustering) | \n", - "mean (MultilabelClassification) | \n", - "mean (STS) | \n", - "mean (Reranking) | \n", - "mean (InstructionRetrieval) | \n", - "mean (wieghted by task type) | \n", - "borda_count | \n", - "Total Evaluation time (hours) | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | \n", - "GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.607 | \n", - "0.643 | \n", - "0.571 | \n", - "0.894 | \n", - "0.708 | \n", - "0.435 | \n", - "0.176 | \n", - "0.755 | \n", - "0.589 | \n", - "0.035 | \n", - "0.534 | \n", - "680.0 | \n", - "6.408 | \n", - "
| 7 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.610 | \n", - "0.635 | \n", - "0.555 | \n", - "0.899 | \n", - "0.767 | \n", - "0.460 | \n", - "0.173 | \n", - "0.772 | \n", - "0.575 | \n", - "-0.004 | \n", - "0.537 | \n", - "679.0 | \n", - "4.463 | \n", - "
| 11 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.592 | \n", - "0.625 | \n", - "0.524 | \n", - "0.907 | \n", - "0.702 | \n", - "0.445 | \n", - "0.155 | \n", - "0.760 | \n", - "0.585 | \n", - "-0.006 | \n", - "0.522 | \n", - "643.0 | \n", - "5.718 | \n", - "
| 3 | \n", - "intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.571 | \n", - "0.609 | \n", - "0.513 | \n", - "0.887 | \n", - "0.690 | \n", - "0.367 | \n", - "0.150 | \n", - "0.756 | \n", - "0.552 | \n", - "-0.031 | \n", - "0.499 | \n", - "527.0 | \n", - "5.765 | \n", - "
| 9 | \n", - "intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.557 | \n", - "0.583 | \n", - "0.506 | \n", - "0.876 | \n", - "0.683 | \n", - "0.367 | \n", - "0.149 | \n", - "0.734 | \n", - "0.530 | \n", - "-0.027 | \n", - "0.489 | \n", - "438.0 | \n", - "2.712 | \n", - "
| 4 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "0.512 | \n", - "0.554 | \n", - "0.393 | \n", - "0.906 | \n", - "0.554 | \n", - "0.343 | \n", - "0.069 | \n", - "0.741 | \n", - "0.516 | \n", - "-0.011 | \n", - "0.451 | \n", - "387.0 | \n", - "14.898 | \n", - "
| 0 | \n", - "intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "0.537 | \n", - "0.565 | \n", - "0.465 | \n", - "0.869 | \n", - "0.660 | \n", - "0.355 | \n", - "0.140 | \n", - "0.710 | \n", - "0.534 | \n", - "-0.024 | \n", - "0.475 | \n", - "347.0 | \n", - "1.901 | \n", - "
| 1 | \n", - "sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "0.498 | \n", - "0.540 | \n", - "0.338 | \n", - "0.850 | \n", - "0.723 | \n", - "0.335 | \n", - "0.163 | \n", - "0.657 | \n", - "0.488 | \n", - "-0.030 | \n", - "0.452 | \n", - "296.0 | \n", - "2.439 | \n", - "
| 5 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "0.484 | \n", - "0.517 | \n", - "0.355 | \n", - "0.888 | \n", - "0.513 | \n", - "0.327 | \n", - "0.057 | \n", - "0.724 | \n", - "0.492 | \n", - "-0.013 | \n", - "0.429 | \n", - "252.0 | \n", - "1.809 | \n", - "
| 6 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "0.433 | \n", - "0.485 | \n", - "0.359 | \n", - "0.796 | \n", - "0.236 | \n", - "0.360 | \n", - "0.109 | \n", - "0.630 | \n", - "0.472 | \n", - "-0.031 | \n", - "0.379 | \n", - "241.5 | \n", - "2.887 | \n", - "
| 8 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "0.431 | \n", - "0.487 | \n", - "0.345 | \n", - "0.809 | \n", - "0.256 | \n", - "0.323 | \n", - "0.076 | \n", - "0.635 | \n", - "0.470 | \n", - "-0.008 | \n", - "0.377 | \n", - "221.0 | \n", - "1.780 | \n", - "
| 10 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "0.425 | \n", - "0.475 | \n", - "0.366 | \n", - "0.796 | \n", - "0.218 | \n", - "0.335 | \n", - "0.088 | \n", - "0.618 | \n", - "0.445 | \n", - "-0.028 | \n", - "0.368 | \n", - "172.5 | \n", - "1.606 | \n", - "
| \n", - " | task | \n", - "AngryTweetsClassification | \n", - "BelebeleRetrieval | \n", - "BibleNLPBitextMining | \n", - "BornholmBitextMining | \n", - "DanFeverRetrieval | \n", - "DanishPoliticalCommentsClassification | \n", - "FloresBitextMining | \n", - "LccSentimentClassification | \n", - "MassiveIntentClassification | \n", - "MassiveScenarioClassification | \n", - "... | \n", - "NordicLangClassification | \n", - "SIB200Classification | \n", - "SIB200ClusteringS2S | \n", - "ScalaClassification | \n", - "TV2Nordretrieval | \n", - "Tatoeba | \n", - "TwitterHjerneRetrieval | \n", - "WikiClusteringP2P.v2 | \n", - "WikipediaRerankingMultilingual | \n", - "WikipediaRetrievalMultilingual | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| model | \n", - "revision | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.562846 | \n", - "0.907317 | \n", - "0.949219 | \n", - "0.332181 | \n", - "0.40416 | \n", - "0.364127 | \n", - "0.822655 | \n", - "0.601333 | \n", - "0.606893 | \n", - "0.679691 | \n", - "... | \n", - "0.758533 | \n", - "0.756863 | \n", - "0.411994 | \n", - "0.509961 | \n", - "0.92682 | \n", - "0.912300 | \n", - "0.42163 | \n", - "0.203832 | \n", - "0.897135 | \n", - "0.90878 | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.576886 | \n", - "0.946187 | \n", - "0.976562 | \n", - "0.296079 | \n", - "0.40868 | \n", - "0.394338 | \n", - "0.826064 | \n", - "0.615333 | \n", - "0.636954 | \n", - "0.711836 | \n", - "... | \n", - "0.801533 | \n", - "0.782353 | \n", - "0.465025 | \n", - "0.516162 | \n", - "0.95369 | \n", - "0.950800 | \n", - "0.35219 | \n", - "0.206153 | \n", - "0.912693 | \n", - "0.92426 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.645081 | \n", - "0.923867 | \n", - "0.992188 | \n", - "0.552233 | \n", - "0.40513 | \n", - "0.448807 | \n", - "0.929799 | \n", - "0.708000 | \n", - "0.718527 | \n", - "0.774748 | \n", - "... | \n", - "0.824433 | \n", - "0.828431 | \n", - "0.586632 | \n", - "0.507129 | \n", - "0.93690 | \n", - "0.955333 | \n", - "0.77233 | \n", - "0.242545 | \n", - "0.900087 | \n", - "0.90626 | \n", - "
| intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "0.562655 | \n", - "0.854937 | \n", - "0.839258 | \n", - "0.371457 | \n", - "0.39601 | \n", - "0.348182 | \n", - "0.762986 | \n", - "0.586000 | \n", - "0.561197 | \n", - "0.640282 | \n", - "... | \n", - "0.721500 | \n", - "0.747059 | \n", - "0.386207 | \n", - "0.508008 | \n", - "0.90379 | \n", - "0.863838 | \n", - "0.29358 | \n", - "0.195741 | \n", - "0.883712 | \n", - "0.89263 | \n", - "
| sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "0.511079 | \n", - "0.739557 | \n", - "0.989583 | \n", - "0.456256 | \n", - "0.34537 | \n", - "0.383403 | \n", - "0.838430 | \n", - "0.500667 | \n", - "0.582313 | \n", - "0.652589 | \n", - "... | \n", - "0.353867 | \n", - "0.599510 | \n", - "0.284970 | \n", - "0.506104 | \n", - "0.76295 | \n", - "0.957067 | \n", - "0.14380 | \n", - "0.177777 | \n", - "0.825927 | \n", - "0.69096 | \n", - "
5 rows \u00d7 22 columns
\n", - "\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 Selected tasks \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n", - "\n" - ], - "text/plain": [ - "\u001b[38;5;235m\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u001b[0m\u001b[1mSelected tasks \u001b[0m\u001b[38;5;235m \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Clustering\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mClustering\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "- SIB200ClusteringS2S, s2s, multilingual 1 / 197 Subsets\n", - "\n" - ], - "text/plain": [ - " - SIB200ClusteringS2S, \u001b[3;38;5;241ms2s\u001b[0m, \u001b[3;31mmultilingual \u001b[0m\u001b[1;3;31m1\u001b[0m\u001b[3;31m \u001b[0m\u001b[3;31m/\u001b[0m\u001b[3;31m \u001b[0m\u001b[1;3;31m197\u001b[0m\u001b[3;31m Subsets\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n" - ], - "text/plain": [ - "\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Classification\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mClassification\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " - AngryTweetsClassification, s2s\n",
- "\n"
- ],
- "text/plain": [
- " - AngryTweetsClassification, \u001b[3;38;5;241ms2s\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " - NordicLangClassification, s2s\n",
- "\n"
- ],
- "text/plain": [
- " - NordicLangClassification, \u001b[3;38;5;241ms2s\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n", - "\n" - ], - "text/plain": [ - "\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BitextMining\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mBitextMining\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " - BornholmBitextMining, s2s\n",
- "\n"
- ],
- "text/plain": [
- " - BornholmBitextMining, \u001b[3;38;5;241ms2s\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n", - "\n" - ], - "text/plain": [ - "\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Reranking\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mReranking\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "- WikipediaRerankingMultilingual, s2p, multilingual 1 / 16 Subsets\n", - "\n" - ], - "text/plain": [ - " - WikipediaRerankingMultilingual, \u001b[3;38;5;241ms2p\u001b[0m, \u001b[3;31mmultilingual \u001b[0m\u001b[1;3;31m1\u001b[0m\u001b[3;31m \u001b[0m\u001b[3;31m/\u001b[0m\u001b[3;31m \u001b[0m\u001b[1;3;31m16\u001b[0m\u001b[3;31m Subsets\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n" - ], - "text/plain": [ - "\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Retrieval\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mRetrieval\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " - DanFeverRetrieval, p2p\n",
- "\n"
- ],
- "text/plain": [
- " - DanFeverRetrieval, \u001b[3;38;5;241mp2p\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n", - "\n" - ], - "text/plain": [ - "\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# we can now run the benchmark\n", - "evaluator = mteb.MTEB(tasks=benchmark)\n", - "\n", - "model = mteb.get_model(\"sentence-transformers/all-MiniLM-L6-v2\")\n", - "results = evaluator.run(model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Aggregating Scores across Benchmark\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Already up to date.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:mteb.load_results.load_results:Validation failed for SIB200ClusteringS2S in intfloat/e5-mistral-7b-instruct 07163b72af1488142a360786df853f237b1a3ca1: Missing subsets {'kac_Latn', 'bug_Latn', 'lit_Latn', 'som_Latn', 'jpn_Jpan', 'srd_Latn', 'mal_Mlym', 'hrv_Latn', 'luo_Latn', 'dik_Latn', 'ita_Latn', 'ell_Grek', 'zho_Hant', 'ary_Arab', 'est_Latn', 'hun_Latn', 'sin_Sinh', 'hye_Armn', 'ltg_Latn', 'fij_Latn', 'hin_Deva', 'nqo_Nkoo', 'tpi_Latn', 'yor_Latn', 'ben_Beng', 'szl_Latn', 'amh_Ethi', 'bul_Cyrl', 'ron_Latn', 'lmo_Latn', 'sag_Latn', 'ilo_Latn', 'mar_Deva', 'epo_Latn', 'kat_Geor', 'snd_Arab', 'san_Deva', 'hau_Latn', 'tgl_Latn', 'khk_Cyrl', 'mni_Beng', 'fra_Latn', 'kaz_Cyrl', 'gaz_Latn', 'scn_Latn', 'min_Latn', 'ace_Latn', 'ajp_Arab', 'urd_Arab', 'tam_Taml', 'run_Latn', 'ban_Latn', 'fuv_Latn', 'bel_Cyrl', 'srp_Cyrl', 'hne_Deva', 'dyu_Latn', 'kir_Cyrl', 'bem_Latn', 'bod_Tibt', 'tha_Thai', 'pap_Latn', 'tso_Latn', 'ydd_Hebr', 'tgk_Cyrl', 'umb_Latn', 'lug_Latn', 'lao_Laoo', 'nob_Latn', 'lim_Latn', 'nno_Latn', 'ast_Latn', 'tzm_Tfng', 'cym_Latn', 'ind_Latn', 'ibo_Latn', 'jav_Latn', 'lvs_Latn', 'npi_Deva', 'pes_Arab', 'fin_Latn', 'lus_Latn', 'sun_Latn', 'tuk_Latn', 'ars_Arab', 'fur_Latn', 'lua_Latn', 'bak_Cyrl', 'kin_Latn', 'spa_Latn', 'fao_Latn', 'kor_Hang', 'twi_Latn', 'war_Latn', 'arb_Latn', 'azb_Arab', 'kas_Deva', 'xho_Latn', 'aeb_Arab', 'guj_Gujr', 'apc_Arab', 'grn_Latn', 'aka_Latn', 'mya_Mymr', 'kab_Latn', 'nso_Latn', 'yue_Hant', 'bam_Latn', 'bho_Deva', 'slv_Latn', 'slk_Latn', 'kbp_Latn', 'kam_Latn', 'taq_Tfng', 'knc_Latn', 'dan_Latn', 'ewe_Latn', 'uig_Arab', 'eus_Latn', 'ory_Orya', 'vie_Latn', 'sot_Latn', 'lij_Latn', 'tur_Latn', 'cat_Latn', 'kmb_Latn', 'kmr_Latn', 'awa_Deva', 'nus_Latn', 'ceb_Latn', 'sat_Olck', 'smo_Latn', 'heb_Hebr', 'crh_Latn', 'bjn_Latn', 'acq_Arab', 'mai_Deva', 'ltz_Latn', 'bos_Latn', 'glg_Latn', 'lin_Latn', 'plt_Latn', 'por_Latn', 'nya_Latn', 'asm_Beng', 'swe_Latn', 'ayr_Latn', 'gle_Latn', 'oci_Latn', 'pol_Latn', 'arz_Arab', 'tel_Telu', 'azj_Latn', 'ssw_Latn', 'tum_Latn', 'zsm_Latn', 'vec_Latn', 'mri_Latn', 'quy_Latn', 'als_Latn', 'shn_Mymr', 'wol_Latn', 'kan_Knda', 'isl_Latn', 'khm_Khmr', 'nld_Latn', 'pan_Guru', 'cjk_Latn', 'fon_Latn', 'tir_Ethi', 'ces_Latn', 'kik_Latn', 'kea_Latn', 'kon_Latn', 'deu_Latn', 'eng_Latn', 'sna_Latn', 'tat_Cyrl', 'pbt_Arab', 'prs_Arab', 'ckb_Arab', 'uzn_Latn', 'gla_Latn', 'acm_Arab', 'dzo_Tibt', 'mos_Latn', 'zul_Latn', 'hat_Latn', 'afr_Latn', 'mag_Deva', 'tsn_Latn', 'ukr_Cyrl', 'swh_Latn', 'mkd_Cyrl', 'pag_Latn', 'mlt_Latn'} for split test\n" - ] - } - ], - "source": [ - "# load task results for the specified models from mteb/results repository\n", - "mteb_results = mteb.load_results(models=models, tasks=benchmark)" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:mteb.task_aggregation:Model intfloat/e5-mistral-7b-instruct revision 07163b72af1488142a360786df853f237b1a3ca1 has missing scores for some tasks\n", - "WARNING:mteb.task_aggregation:Model GritLM/GritLM-7B revision 13f00a0e36500c80ce12870ea513846a066004af has missing scores for some tasks\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:mteb.task_aggregation:Model intfloat/e5-mistral-7b-instruct revision 07163b72af1488142a360786df853f237b1a3ca1 has missing scores for some tasks of type Classification\n", - "WARNING:mteb.task_aggregation:Model intfloat/e5-mistral-7b-instruct revision 07163b72af1488142a360786df853f237b1a3ca1 has missing scores for some tasks of type Reranking\n", - "WARNING:mteb.task_aggregation:Model intfloat/e5-mistral-7b-instruct revision 07163b72af1488142a360786df853f237b1a3ca1 has missing scores for some tasks of type BitextMining\n", - "WARNING:mteb.task_aggregation:Model intfloat/e5-mistral-7b-instruct revision 07163b72af1488142a360786df853f237b1a3ca1 has missing scores for some tasks of type Clustering\n", - "WARNING:mteb.task_aggregation:Model intfloat/e5-mistral-7b-instruct revision 07163b72af1488142a360786df853f237b1a3ca1 has missing scores for some tasks of type Retrieval\n", - "WARNING:mteb.task_aggregation:Model GritLM/GritLM-7B revision 13f00a0e36500c80ce12870ea513846a066004af has missing scores for some tasks of type Classification\n", - "WARNING:mteb.task_aggregation:Model GritLM/GritLM-7B revision 13f00a0e36500c80ce12870ea513846a066004af has missing scores for some tasks of type Reranking\n", - "WARNING:mteb.task_aggregation:Model GritLM/GritLM-7B revision 13f00a0e36500c80ce12870ea513846a066004af has missing scores for some tasks of type BitextMining\n", - "WARNING:mteb.task_aggregation:Model GritLM/GritLM-7B revision 13f00a0e36500c80ce12870ea513846a066004af has missing scores for some tasks of type Clustering\n", - "WARNING:mteb.task_aggregation:Model GritLM/GritLM-7B revision 13f00a0e36500c80ce12870ea513846a066004af has missing scores for some tasks of type Retrieval\n" - ] - } - ], - "source": [ - "import mteb.task_aggregation as task_aggregation\n", - "\n", - "mean = task_aggregation.mean(mteb_results)\n", - "weighted_mean = task_aggregation.task_category_weighted_mean(mteb_results)\n", - "borda = task_aggregation.borda_count(mteb_results)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | model | \n", - "revision | \n", - "mean | \n", - "weighted_mean | \n", - "borda_count | \n", - "
|---|---|---|---|---|---|
| 5 | \n", - "intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.631669 | \n", - "0.611051 | \n", - "64.0 | \n", - "
| 9 | \n", - "intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.536125 | \n", - "0.505508 | \n", - "55.0 | \n", - "
| 6 | \n", - "intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "0.526768 | \n", - "0.503706 | \n", - "50.0 | \n", - "
| 10 | \n", - "intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.525225 | \n", - "0.498132 | \n", - "50.0 | \n", - "
| 11 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "0.430674 | \n", - "0.420396 | \n", - "36.0 | \n", - "
| 0 | \n", - "sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "0.443979 | \n", - "0.446280 | \n", - "34.0 | \n", - "
| 8 | \n", - "sentence-transformers/paraphrase-multilingual-... | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "0.413172 | \n", - "0.402389 | \n", - "29.0 | \n", - "
| 4 | \n", - "sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "0.399563 | \n", - "0.382440 | \n", - "28.0 | \n", - "
| 2 | \n", - "sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "0.372704 | \n", - "0.352972 | \n", - "22.0 | \n", - "
| 7 | \n", - "sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "0.377550 | \n", - "0.355880 | \n", - "22.0 | \n", - "
| 1 | \n", - "intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "NaN | \n", - "NaN | \n", - "0.0 | \n", - "
| 3 | \n", - "GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "NaN | \n", - "NaN | \n", - "0.0 | \n", - "
| \n", - " | task | \n", - "BelebeleRetrieval | \n", - "BengaliDocumentClassification | \n", - "BengaliHateSpeechClassification | \n", - "BengaliSentimentAnalysis | \n", - "BibleNLPBitextMining | \n", - "FloresBitextMining | \n", - "GujaratiNewsClassification | \n", - "HindiDiscourseClassification | \n", - "IN22ConvBitextMining | \n", - "IN22GenBitextMining | \n", - "... | \n", - "TamilNewsClassification | \n", - "Tatoeba | \n", - "TeluguAndhraJyotiNewsClassification | \n", - "TweetSentimentClassification | \n", - "UrduRomanSentimentClassification | \n", - "WikipediaRerankingMultilingual | \n", - "WikipediaRetrievalMultilingual | \n", - "XNLI | \n", - "XPQARetrieval | \n", - "XQuADRetrieval | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| model | \n", - "revision | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.651366 | \n", - "0.419482 | \n", - "0.548942 | \n", - "0.721005 | \n", - "0.726886 | \n", - "0.638675 | \n", - "0.698558 | \n", - "0.370898 | \n", - "0.380802 | \n", - "0.676261 | \n", - "... | \n", - "0.311205 | \n", - "0.690237 | \n", - "0.616112 | \n", - "0.375781 | \n", - "0.496861 | \n", - "0.846951 | \n", - "0.832380 | \n", - "0.665936 | \n", - "0.397125 | \n", - "0.88930 | \n", - "
| intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.614661 | \n", - "0.435547 | \n", - "0.497658 | \n", - "0.720748 | \n", - "0.759505 | \n", - "0.637100 | \n", - "0.730880 | \n", - "0.320068 | \n", - "0.403273 | \n", - "0.668236 | \n", - "... | \n", - "0.313787 | \n", - "0.711112 | \n", - "0.654907 | \n", - "0.379297 | \n", - "0.494694 | \n", - "0.844201 | \n", - "0.825875 | \n", - "0.716875 | \n", - "0.377082 | \n", - "0.88243 | \n", - "
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.650504 | \n", - "0.518994 | \n", - "0.485035 | \n", - "0.796431 | \n", - "0.800805 | \n", - "0.825293 | \n", - "0.749090 | \n", - "0.390381 | \n", - "0.547230 | \n", - "0.748925 | \n", - "... | \n", - "0.391938 | \n", - "0.857164 | \n", - "0.786698 | \n", - "0.380469 | \n", - "0.403741 | \n", - "0.837615 | \n", - "0.829165 | \n", - "0.721940 | \n", - "0.394047 | \n", - "0.95313 | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.739715 | \n", - "0.540234 | \n", - "0.487615 | \n", - "0.830707 | \n", - "0.804803 | \n", - "0.855500 | \n", - "0.767375 | \n", - "0.387402 | \n", - "0.588946 | \n", - "0.775150 | \n", - "... | \n", - "0.424952 | \n", - "0.881030 | \n", - "0.792410 | \n", - "0.364063 | \n", - "0.416054 | \n", - "0.859706 | \n", - "0.860075 | \n", - "0.740601 | \n", - "0.437062 | \n", - "0.97010 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.762216 | \n", - "0.496533 | \n", - "0.584896 | \n", - "0.839849 | \n", - "0.844991 | \n", - "0.905643 | \n", - "0.875190 | \n", - "0.352344 | \n", - "0.632050 | \n", - "0.795942 | \n", - "... | \n", - "0.485856 | \n", - "0.935997 | \n", - "0.796409 | \n", - "0.373828 | \n", - "0.438792 | \n", - "0.874621 | \n", - "0.865210 | \n", - "0.756606 | \n", - "0.440010 | \n", - "0.95971 | \n", - "
5 rows \u00d7 46 columns
\n", - "| \n", - " | task | \n", - "
|---|---|
| model | \n", - "revision | \n", - "
| \n", - " | task | \n", - "
|---|---|
| model | \n", - "revision | \n", - "
| GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "
| intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "
| intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "
| sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "
| sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "
| sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "
| sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "
| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "
| sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "
| \n", - " | \n", - " | Languages | \n", - "Domains | \n", - "License | \n", - "Description | \n", - "
|---|---|---|---|---|---|
| Type | \n", - "Name | \n", - "\n", - " | \n", - " | \n", - " | \n", - " |
| BitextMining | \n", - "IN22ConvBitextMining | \n", - "{ory, kas, asm, snd, hin, mar, tam, san, kan, ... | \n", - "[Social, Spoken, Fiction, Spoken] | \n", - "CC-BY-4.0 | \n", - "IN22-Conv is a n-way parallel conversation dom... | \n", - "
| IN22GenBitextMining | \n", - "{ory, kas, asm, snd, hin, mar, tam, san, kan, ... | \n", - "[Web, Legal, Government, News, Religious, Non-... | \n", - "CC-BY-4.0 | \n", - "IN22-Gen is a n-way parallel general-purpose m... | \n", - "|
| IndicGenBenchFloresBitextMining | \n", - "{ory, asm, gbm, hin, nep, mar, tam, bgc, mup, ... | \n", - "[Web, News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Flores-IN dataset is an extension of Flores da... | \n", - "|
| LinceMTBitextMining | \n", - "{hin} | \n", - "[Social, Written] | \n", - "Unknown | \n", - "LinceMT is a parallel corpus for machine trans... | \n", - "|
| Classification | \n", - "BengaliSentimentAnalysis | \n", - "{ben} | \n", - "[Reviews, Written] | \n", - "CC BY 4.0 | \n", - "dataset contains 3307 Negative reviews and 850... | \n", - "
| GujaratiNewsClassification | \n", - "{guj} | \n", - "[News, Written] | \n", - "MIT | \n", - "A Gujarati dataset for 3-class classification ... | \n", - "|
| HindiDiscourseClassification | \n", - "{hin} | \n", - "[Fiction, Social, Written] | \n", - "MIT | \n", - "A Hindi Discourse dataset in Hindi with values... | \n", - "|
| SentimentAnalysisHindi | \n", - "{hin} | \n", - "[Reviews, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "Hindi Sentiment Analysis Dataset | \n", - "|
| MalayalamNewsClassification | \n", - "{mal} | \n", - "[News, Written] | \n", - "MIT | \n", - "A Malayalam dataset for 3-class classification... | \n", - "|
| IndicLangClassification | \n", - "{ory, kas, asm, snd, hin, mar, tam, san, kan, ... | \n", - "[Web, Non-fiction, Written] | \n", - "CC0 | \n", - "A language identification test set for native-... | \n", - "|
| MTOPIntentClassification | \n", - "{hin} | \n", - "[Spoken, Spoken] | \n", - "Not specified | \n", - "MTOP: Multilingual Task-Oriented Semantic Parsing | \n", - "|
| MultiHateClassification | \n", - "{hin} | \n", - "[Constructed, Written] | \n", - "cc-by-4.0 | \n", - "Hate speech detection dataset with binary\\n ... | \n", - "|
| TweetSentimentClassification | \n", - "{hin} | \n", - "[Social, Written] | \n", - "cc-by-3.0 | \n", - "A multilingual Sentiment Analysis dataset cons... | \n", - "|
| NepaliNewsClassification | \n", - "{nep} | \n", - "[News, Written] | \n", - "CC BY-SA 4.0 | \n", - "A Nepali dataset for 7500 news articles | \n", - "|
| PunjabiNewsClassification | \n", - "{pan} | \n", - "[News, Written] | \n", - "MIT | \n", - "A Punjabi dataset for 2-class classification o... | \n", - "|
| SanskritShlokasClassification | \n", - "{san} | \n", - "[Religious, Written] | \n", - "CC BY-SA 4.0 | \n", - "This data set contains ~500 Shlokas | \n", - "|
| UrduRomanSentimentClassification | \n", - "{urd} | \n", - "[Social, Written] | \n", - "MIT | \n", - "The Roman Urdu dataset is a data corpus compri... | \n", - "|
| Clustering | \n", - "SIB200ClusteringS2S | \n", - "{ory, kas, asm, snd, hin, mar, tam, san, kan, ... | \n", - "[News, Written] | \n", - "cc-by-sa-4.0 | \n", - "SIB-200 is the largest publicly available topi... | \n", - "
| Retrieval | \n", - "BelebeleRetrieval | \n", - "{mal, ory, ben, mar, tam, npi, guj, asm, snd, ... | \n", - "[Web, News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Belebele is a multiple-choice machine reading ... | \n", - "
| XQuADRetrieval | \n", - "{hin} | \n", - "[Web, Written] | \n", - "CC BY-SA 4.0 | \n", - "XQuAD is a benchmark dataset for evaluating cr... | \n", - "|
| PairClassification | \n", - "XNLI | \n", - "{hin} | \n", - "[Non-fiction, Fiction, Government, Written] | \n", - "Not specified | \n", - "\n", - " |
| Reranking | \n", - "WikipediaRerankingMultilingual | \n", - "{hin, ben} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "The dataset is derived from Cohere's wikipedia... | \n", - "
| STS | \n", - "IndicCrosslingualSTS | \n", - "{mal, ory, ben, mar, tam, guj, asm, kan, urd, ... | \n", - "[News, Non-fiction, Web, Spoken, Government, W... | \n", - "CC0 | \n", - "This is a Semantic Textual Similarity testset ... | \n", - "
| \n", - " | task | \n", - "AFQMC | \n", - "AILACasedocs | \n", - "AILAStatutes | \n", - "AJGT | \n", - "ARCChallenge | \n", - "ATEC | \n", - "AfriSentiClassification | \n", - "AfriSentiLangClassification | \n", - "AllegroReviews | \n", - "AlloProfClusteringP2P.v2 | \n", - "... | \n", - "WikipediaRetrievalMultilingual | \n", - "WinoGrande | \n", - "WisesightSentimentClassification | \n", - "XMarket | \n", - "XNLI | \n", - "XPQARetrieval | \n", - "XQuADRetrieval | \n", - "YelpReviewFullClassification | \n", - "YueOpenriceReviewClassification | \n", - "indonli | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| model | \n", - "revision | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.355864 | \n", - "0.35292 | \n", - "0.41800 | \n", - "0.809611 | \n", - "0.26677 | \n", - "0.408941 | \n", - "0.450786 | \n", - "0.931445 | \n", - "0.567694 | \n", - "0.671576 | \n", - "... | \n", - "0.917722 | \n", - "0.53697 | \n", - "0.341335 | \n", - "0.259600 | \n", - "0.741047 | \n", - "0.506027 | \n", - "0.947917 | \n", - "0.650635 | \n", - "0.374902 | \n", - "0.555207 | \n", - "
| intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.389854 | \n", - "0.36662 | \n", - "0.34535 | \n", - "0.823778 | \n", - "0.19001 | \n", - "0.428429 | \n", - "0.444763 | \n", - "0.921680 | \n", - "0.597813 | \n", - "0.691183 | \n", - "... | \n", - "0.909265 | \n", - "0.39514 | \n", - "0.356845 | \n", - "0.287633 | \n", - "0.779189 | \n", - "0.474599 | \n", - "0.933592 | \n", - "0.618311 | \n", - "0.330566 | \n", - "0.579942 | \n", - "
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.296610 | \n", - "0.26053 | \n", - "0.20371 | \n", - "0.777889 | \n", - "0.09611 | \n", - "0.370099 | \n", - "0.438023 | \n", - "0.671191 | \n", - "0.407753 | \n", - "0.631008 | \n", - "... | \n", - "0.887509 | \n", - "0.56177 | \n", - "0.363027 | \n", - "0.167343 | \n", - "0.709983 | \n", - "0.415317 | \n", - "0.958011 | \n", - "0.597217 | \n", - "0.315869 | \n", - "0.509662 | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.330127 | \n", - "0.26427 | \n", - "0.20842 | \n", - "0.802889 | \n", - "0.10828 | \n", - "0.398049 | \n", - "0.455005 | \n", - "0.642822 | \n", - "0.410437 | \n", - "0.636065 | \n", - "... | \n", - "0.908209 | \n", - "0.54985 | \n", - "0.360570 | \n", - "0.171770 | \n", - "0.739017 | \n", - "0.472794 | \n", - "0.970637 | \n", - "0.643164 | \n", - "0.347949 | \n", - "0.517360 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.375337 | \n", - "0.33330 | \n", - "0.29659 | \n", - "0.854500 | \n", - "0.15027 | \n", - "0.432675 | \n", - "0.453874 | \n", - "0.914404 | \n", - "0.524254 | \n", - "0.669222 | \n", - "... | \n", - "0.915935 | \n", - "0.54272 | \n", - "0.368923 | \n", - "0.256423 | \n", - "0.784905 | \n", - "0.518825 | \n", - "0.965380 | \n", - "0.652686 | \n", - "0.347656 | \n", - "0.561701 | \n", - "
5 rows × 507 columns
\n", - "| \n", - " | task | \n", - "
|---|---|
| model | \n", - "revision | \n", - "
| \n", - " | task | \n", - "Diversity1LegalBenchClassification | \n", - "Diversity2LegalBenchClassification | \n", - "IFlyTek | \n", - "TNews | \n", - "
|---|---|---|---|---|---|
| model | \n", - "revision | \n", - "\n", - " | \n", - " | \n", - " | \n", - " |
| GritLM/GritLM-7B | \n", - "13f00a0e36500c80ce12870ea513846a066004af | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| intfloat/e5-mistral-7b-instruct | \n", - "07163b72af1488142a360786df853f237b1a3ca1 | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| intfloat/multilingual-e5-base | \n", - "d13f1b27baf31030b7fd040960d60d909913633f | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| intfloat/multilingual-e5-large | \n", - "4dc6d853a804b9c8886ede6dda8a073b7dc08a81 | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| intfloat/multilingual-e5-large-instruct | \n", - "baa7be480a7de1539afce709c8f13f833a510e0a | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| intfloat/multilingual-e5-small | \n", - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| sentence-transformers/LaBSE | \n", - "e34fab64a3011d2176c99545a93d5cbddc9a91b7 | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| sentence-transformers/all-MiniLM-L12-v2 | \n", - "a05860a77cef7b37e0048a7864658139bc18a854 | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| sentence-transformers/all-MiniLM-L6-v2 | \n", - "8b3219a92973c328a8e22fadcfa821b5dc75636a | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| sentence-transformers/all-mpnet-base-v2 | \n", - "84f2bcc00d77236f9e89c8a360a00fb1139bf47d | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | \n", - "bf3bf13ab40c3157080a7ab344c831b9ad18b5eb | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | \n", - "79f2382ceacceacdf38563d7c5d16b9ff8d725d6 | \n", - "0.763333 | \n", - "0.746667 | \n", - "0.0 | \n", - "0.0 | \n", - "
| \n", - " | \n", - " | Languages | \n", - "Domains | \n", - "License | \n", - "Description | \n", - "
|---|---|---|---|---|---|
| Type | \n", - "Name | \n", - "\n", - " | \n", - " | \n", - " | \n", - " |
| BitextMining | \n", - "BornholmBitextMining | \n", - "{dan} | \n", - "[Web, Social, Fiction, Written] | \n", - "CC-BY-4.0 | \n", - "Danish Bornholmsk Parallel Corpus. Bornholmsk ... | \n", - "
| BibleNLPBitextMining | \n", - "{aoj, ncl, imo, acu, eko, urb, bvd, cab, bsp, ... | \n", - "[Religious, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Partial Bible translations in 829 languages, a... | \n", - "|
| BUCC.v2 | \n", - "{deu, cmn, fra, rus, eng} | \n", - "[Written] | \n", - "Unknown | \n", - "BUCC bitext mining dataset | \n", - "|
| DiaBlaBitextMining | \n", - "{fra, eng} | \n", - "[Social, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "English-French Parallel Corpus. DiaBLa is an E... | \n", - "|
| FloresBitextMining | \n", - "{kin, ita, zul, sin, kbp, khk, ast, ell, shn, ... | \n", - "[Non-fiction, Encyclopaedic, Written] | \n", - "CC BY-SA 4.0 | \n", - "FLORES is a benchmark dataset for machine tran... | \n", - "|
| IN22GenBitextMining | \n", - "{san, kas, mni, sat, mal, brx, pan, asm, tam, ... | \n", - "[Web, Legal, Government, News, Religious, Non-... | \n", - "CC-BY-4.0 | \n", - "IN22-Gen is a n-way parallel general-purpose m... | \n", - "|
| IndicGenBenchFloresBitextMining | \n", - "{san, mup, mni, sat, bgc, hne, bho, nep, mal, ... | \n", - "[Web, News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Flores-IN dataset is an extension of Flores da... | \n", - "|
| NollySentiBitextMining | \n", - "{yor, pcm, hau, ibo, eng} | \n", - "[Social, Reviews, Written] | \n", - "CC BY-SA 4.0 | \n", - "NollySenti is Nollywood movie reviews for five... | \n", - "|
| NorwegianCourtsBitextMining | \n", - "{nob, nno} | \n", - "[Legal, Written] | \n", - "CC BY 4.0 | \n", - "Nynorsk and Bokmål parallel corpus from Norweg... | \n", - "|
| NTREXBitextMining | \n", - "{kin, ita, zul, sin, swa, ell, slk, uig, mal, ... | \n", - "[News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "NTREX is a News Test References dataset for Ma... | \n", - "|
| NusaTranslationBitextMining | \n", - "{bew, bbc, mad, sun, ind, min, bhp, jav, mak, ... | \n", - "[Social, Written] | \n", - "CC BY-SA 4.0 | \n", - "NusaTranslation is a parallel dataset for mach... | \n", - "|
| NusaXBitextMining | \n", - "{bjn, bbc, mad, nij, ace, ind, min, bug, jav, ... | \n", - "[Reviews, Written] | \n", - "CC BY-SA 4.0 | \n", - "NusaX is a parallel dataset for machine transl... | \n", - "|
| Tatoeba | \n", - "{ita, max, ast, ell, slk, nov, uig, mal, swh, ... | \n", - "[Written] | \n", - "CC BY 2.0 | \n", - "1,000 English-aligned sentence pairs for each ... | \n", - "|
| Classification | \n", - "BulgarianStoreReviewSentimentClassfication | \n", - "{bul} | \n", - "[Reviews, Written] | \n", - "cc-by-4.0 | \n", - "Bulgarian online store review dataset for sent... | \n", - "
| CzechProductReviewSentimentClassification | \n", - "{ces} | \n", - "[Reviews, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "User reviews of products on Czech e-shop Mall.... | \n", - "|
| GreekLegalCodeClassification | \n", - "{ell} | \n", - "[Legal, Written] | \n", - "cc-by-4.0 | \n", - "Greek Legal Code Dataset for Classification. (... | \n", - "|
| DBpediaClassification | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "DBpedia14 is a dataset of English texts from W... | \n", - "|
| FinancialPhrasebankClassification | \n", - "{eng} | \n", - "[News, Written] | \n", - "cc-by-nc-sa-3.0 | \n", - "Polar sentiment dataset of sentences from fina... | \n", - "|
| PoemSentimentClassification | \n", - "{eng} | \n", - "[Reviews, Written] | \n", - "CC-BY-4.0 | \n", - "Poem Sentiment is a sentiment dataset of poem ... | \n", - "|
| ToxicConversationsClassification | \n", - "{eng} | \n", - "[Social, Written] | \n", - "CC BY 4.0 | \n", - "Collection of comments from the Civil Comments... | \n", - "|
| TweetTopicSingleClassification | \n", - "{eng} | \n", - "[Social, News, Written] | \n", - "Other | \n", - "Topic classification dataset on Twitter with 6... | \n", - "|
| EstonianValenceClassification | \n", - "{est} | \n", - "[News, Written] | \n", - "CC BY 4.0 | \n", - "Dataset containing annotated Estonian news dat... | \n", - "|
| FilipinoShopeeReviewsClassification | \n", - "{fil} | \n", - "[Social, Written] | \n", - "MPL-2.0 | \n", - "The Shopee reviews tl 15 dataset is constructe... | \n", - "|
| GujaratiNewsClassification | \n", - "{guj} | \n", - "[News, Written] | \n", - "MIT | \n", - "A Gujarati dataset for 3-class classification ... | \n", - "|
| SentimentAnalysisHindi | \n", - "{hin} | \n", - "[Reviews, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "Hindi Sentiment Analysis Dataset | \n", - "|
| IndonesianIdClickbaitClassification | \n", - "{ind} | \n", - "[News, Written] | \n", - "cc-by-4.0 | \n", - "The CLICK-ID dataset is a collection of Indone... | \n", - "|
| ItaCaseholdClassification | \n", - "{ita} | \n", - "[Legal, Government, Written] | \n", - "Apache 2.0 | \n", - "An Italian Dataset consisting of 1101 pairs of... | \n", - "|
| KorSarcasmClassification | \n", - "{kor} | \n", - "[Social, Written] | \n", - "MIT | \n", - "\\n The Korean Sarcasm Dataset was creat... | \n", - "|
| KurdishSentimentClassification | \n", - "{kur} | \n", - "[Web, Written] | \n", - "CC BY 4.0 | \n", - "Kurdish Sentiment Dataset | \n", - "|
| MacedonianTweetSentimentClassification | \n", - "{mkd} | \n", - "[Social, Written] | \n", - "CC BY-NC-SA 3.0 | \n", - "An Macedonian dataset for tweet sentiment clas... | \n", - "|
| AfriSentiClassification | \n", - "{yor, kin, pcm, twi, ary, por, tso, arq, hau, ... | \n", - "[Social, Written] | \n", - "Creative Commons Attribution 4.0 International... | \n", - "AfriSenti is the largest sentiment analysis da... | \n", - "|
| AmazonCounterfactualClassification | \n", - "{deu, eng, jpn} | \n", - "[Reviews, Written] | \n", - "CC BY 4.0 | \n", - "A collection of Amazon customer reviews annota... | \n", - "|
| CataloniaTweetClassification | \n", - "{spa, cat} | \n", - "[Social, Government, Written] | \n", - "cc-by-sa-4.0 | \n", - "This dataset contains two corpora in Spanish a... | \n", - "|
| CyrillicTurkicLangClassification | \n", - "{kir, bak, sah, chv, tat, rus, kaz, krc, tyv} | \n", - "[Web, Written] | \n", - "CC BY-NC 4.0 DEED | \n", - "Cyrillic dataset of 8 Turkic languages spoken ... | \n", - "|
| IndicLangClassification | \n", - "{san, kas, mni, sat, mal, brx, pan, asm, tam, ... | \n", - "[Web, Non-fiction, Written] | \n", - "CC0 | \n", - "A language identification test set for native-... | \n", - "|
| MasakhaNEWSClassification | \n", - "{yor, pcm, lin, run, fra, hau, ibo, amh, som, ... | \n", - "[News, Written] | \n", - "cc-by-nc-4.0 | \n", - "MasakhaNEWS is the largest publicly available ... | \n", - "|
| MassiveIntentClassification | \n", - "{afr, ara, ita, pol, hun, rus, cym, dan, jav, ... | \n", - "[Spoken] | \n", - "Apache 2.0 | \n", - "MASSIVE: A 1M-Example Multilingual Natural Lan... | \n", - "|
| MultiHateClassification | \n", - "{ara, deu, nld, por, cmn, ita, pol, fra, hin, ... | \n", - "[Constructed, Written] | \n", - "cc-by-4.0 | \n", - "Hate speech detection dataset with binary\\n ... | \n", - "|
| NordicLangClassification | \n", - "{isl, swe, fao, nob, dan, nno} | \n", - "[Encyclopaedic] | \n", - "cc-by-sa-3.0 | \n", - "A dataset for Nordic language identification. | \n", - "|
| NusaParagraphEmotionClassification | \n", - "{bew, mad, bbc, sun, min, bug, jav, mak, rej, ... | \n", - "[Non-fiction, Fiction, Written] | \n", - "Apache 2.0 | \n", - "NusaParagraphEmotionClassification is a multi-... | \n", - "|
| NusaX-senti | \n", - "{bjn, bbc, mad, nij, ace, ind, min, bug, jav, ... | \n", - "[Reviews, Web, Social, Constructed, Written] | \n", - "CC-BY-SA 4.0 | \n", - "NusaX is a high-quality multilingual parallel ... | \n", - "|
| ScalaClassification | \n", - "{swe, nob, dan, nno} | \n", - "[Fiction, News, Non-fiction, Blog, Spoken, Web... | \n", - "CC BY-SA 4.0 | \n", - "ScaLa a linguistic acceptability dataset for t... | \n", - "|
| SwissJudgementClassification | \n", - "{ita, deu, fra} | \n", - "[Legal, Written] | \n", - "CC-BY-4.0 | \n", - "Multilingual, diachronic dataset of Swiss Fede... | \n", - "|
| NepaliNewsClassification | \n", - "{nep} | \n", - "[News, Written] | \n", - "CC BY-SA 4.0 | \n", - "A Nepali dataset for 7500 news articles | \n", - "|
| OdiaNewsClassification | \n", - "{ory} | \n", - "[News, Written] | \n", - "MIT | \n", - "A Odia dataset for 3-class classification of O... | \n", - "|
| PunjabiNewsClassification | \n", - "{pan} | \n", - "[News, Written] | \n", - "MIT | \n", - "A Punjabi dataset for 2-class classification o... | \n", - "|
| PolEmo2.0-OUT | \n", - "{pol} | \n", - "[Written, Social] | \n", - "cc-by-sa-4.0 | \n", - "A collection of Polish online reviews from fou... | \n", - "|
| PAC | \n", - "{pol} | \n", - "[Legal, Written] | \n", - "cc-by-nc-sa-4.0 | \n", - "Polish Paraphrase Corpus | \n", - "|
| SinhalaNewsClassification | \n", - "{sin} | \n", - "[News, Written] | \n", - "mit | \n", - "This file contains news texts (sentences) belo... | \n", - "|
| CSFDSKMovieReviewSentimentClassification | \n", - "{slk} | \n", - "[Reviews, Written] | \n", - "CC-BY-SA-4.0 | \n", - "The dataset contains 30k user reviews from csf... | \n", - "|
| SiswatiNewsClassification | \n", - "{ssw} | \n", - "[News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Siswati News Classification Dataset | \n", - "|
| SlovakMovieReviewSentimentClassification | \n", - "{svk} | \n", - "[Reviews, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "User reviews of movies on the CSFD movie datab... | \n", - "|
| SwahiliNewsClassification | \n", - "{swa} | \n", - "[News, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "Dataset for Swahili News Classification, categ... | \n", - "|
| DalajClassification | \n", - "{swe} | \n", - "[Non-fiction, Written] | \n", - "CC-BY-4.0 | \n", - "A Swedish dataset for linguistic acceptability... | \n", - "|
| TswanaNewsClassification | \n", - "{tsn} | \n", - "[News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Tswana News Classification Dataset | \n", - "|
| IsiZuluNewsClassification | \n", - "{zul} | \n", - "[News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "isiZulu News Classification Dataset | \n", - "|
| Clustering | \n", - "WikiCitiesClustering | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-4.0 | \n", - "Clustering of Wikipedia articles of cities by ... | \n", - "
| MasakhaNEWSClusteringS2S | \n", - "{yor, pcm, lin, run, fra, hau, ibo, amh, som, ... | \n", - "None | \n", - "None | \n", - "Clustering of news article headlines from Masa... | \n", - "|
| RomaniBibleClustering | \n", - "{rom} | \n", - "[Religious, Written] | \n", - "MIT | \n", - "Clustering verses from the Bible in Kalderash ... | \n", - "|
| ArXivHierarchicalClusteringP2P | \n", - "{eng} | \n", - "[Academic, Written] | \n", - "CC0 | \n", - "Clustering of titles+abstract from arxiv. Clus... | \n", - "|
| ArXivHierarchicalClusteringS2S | \n", - "{eng} | \n", - "[Academic, Written] | \n", - "CC0 | \n", - "Clustering of titles from arxiv. Clustering of... | \n", - "|
| BigPatentClustering.v2 | \n", - "{eng} | \n", - "[Legal, Written] | \n", - "cc-by-4.0 | \n", - "Clustering of documents from the Big Patent da... | \n", - "|
| BiorxivClusteringP2P.v2 | \n", - "{eng} | \n", - "[Academic, Written] | \n", - "https://www.biorxiv.org/content/about-biorxiv | \n", - "Clustering of titles+abstract from biorxiv acr... | \n", - "|
| MedrxivClusteringP2P.v2 | \n", - "{eng} | \n", - "[Academic, Medical, Written] | \n", - "https://www.medrxiv.org/content/about-medrxiv | \n", - "Clustering of titles+abstract from medrxiv acr... | \n", - "|
| StackExchangeClustering.v2 | \n", - "{eng} | \n", - "[Web, Written] | \n", - "Not specified | \n", - "Clustering of titles from 121 stackexchanges. ... | \n", - "|
| AlloProfClusteringS2S.v2 | \n", - "{fra} | \n", - "[Encyclopaedic, Written] | \n", - "mit | \n", - "Clustering of document titles from Allo Prof d... | \n", - "|
| HALClusteringS2S.v2 | \n", - "{fra} | \n", - "[Academic, Written] | \n", - "Apache-2.0 | \n", - "Clustering of titles from HAL (https://hugging... | \n", - "|
| SIB200ClusteringS2S | \n", - "{kin, ita, zul, sin, kbp, khk, ast, ell, shn, ... | \n", - "[News, Written] | \n", - "cc-by-sa-4.0 | \n", - "SIB-200 is the largest publicly available topi... | \n", - "|
| WikiClusteringP2P.v2 | \n", - "{sqi, wln, mlt, ilo, lav, min, cat, ces, dan, ... | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "Clustering of wikipedia articles inspired by B... | \n", - "|
| SNLHierarchicalClusteringP2P | \n", - "{nob} | \n", - "[Encyclopaedic, Non-fiction, Written] | \n", - "CC-BY-NC | \n", - "Webscrabed articles from the Norwegian lexicon... | \n", - "|
| PlscClusteringP2P.v2 | \n", - "{pol} | \n", - "[Academic, Written] | \n", - "cc0-1.0 | \n", - "Clustering of Polish article titles+abstracts ... | \n", - "|
| SwednClusteringP2P | \n", - "{swe} | \n", - "[News, Non-fiction, Written] | \n", - "cc-by-4.0 | \n", - "The SWE-DN corpus is based on 1,963,576 news a... | \n", - "|
| CLSClusteringP2P.v2 | \n", - "{cmn} | \n", - "[Academic, Written] | \n", - "Apache-2.0 | \n", - "Clustering of titles + abstract from CLS datas... | \n", - "|
| Retrieval | \n", - "StackOverflowQA | \n", - "{eng} | \n", - "[Programming, Written] | \n", - "MIT | \n", - "The dataset is a collection of natural languag... | \n", - "
| TwitterHjerneRetrieval | \n", - "{dan} | \n", - "[Social, Written] | \n", - "CC BY 4.0 | \n", - "Danish question asked on Twitter with the Hash... | \n", - "|
| AILAStatutes | \n", - "{eng} | \n", - "[Legal, Written] | \n", - "CC BY 4.0 | \n", - "This dataset is structured for the task of ide... | \n", - "|
| ArguAna | \n", - "{eng} | \n", - "[Medical, Written] | \n", - "cc-by-sa-4.0 | \n", - "NFCorpus: A Full-Text Learning to Rank Dataset... | \n", - "|
| HagridRetrieval | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "apache-2.0 | \n", - "HAGRID (Human-in-the-loop Attributable Generat... | \n", - "|
| LegalBenchCorporateLobbying | \n", - "{eng} | \n", - "[Legal, Written] | \n", - "CC BY 4.0 | \n", - "The dataset includes bill titles and bill summ... | \n", - "|
| LEMBPasskeyRetrieval | \n", - "{eng} | \n", - "[Fiction, Written] | \n", - "Not specified | \n", - "passkey subset of dwzhu/LongEmbed dataset. | \n", - "|
| SCIDOCS | \n", - "{eng} | \n", - "[Academic, Written, Non-fiction] | \n", - "cc-by-sa-4.0 | \n", - "SciDocs, a new evaluation benchmark consisting... | \n", - "|
| SpartQA | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "MIT | \n", - "Measuring the ability to retrieve the groundtr... | \n", - "|
| TempReasonL1 | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "CC BY-SA 3.0 | \n", - "Measuring the ability to retrieve the groundtr... | \n", - "|
| TRECCOVID | \n", - "{eng} | \n", - "None | \n", - "None | \n", - "TRECCOVID is an ad-hoc search challenge based ... | \n", - "|
| WinoGrande | \n", - "{eng} | \n", - "[Encyclopaedic, Written] | \n", - "CC BY | \n", - "Measuring the ability to retrieve the groundtr... | \n", - "|
| BelebeleRetrieval | \n", - "{kin, ita, zul, sin, khk, ell, shn, slk, mal, ... | \n", - "[Web, News, Written] | \n", - "CC-BY-SA-4.0 | \n", - "Belebele is a multiple-choice machine reading ... | \n", - "|
| MLQARetrieval | \n", - "{ara, deu, vie, hin, zho, eng, spa} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "MLQA (MultiLingual Question Answering) is a be... | \n", - "|
| StatcanDialogueDatasetRetrieval | \n", - "{fra, eng} | \n", - "[Government, Web, Written] | \n", - "https://huggingface.co/datasets/McGill-NLP/sta... | \n", - "A Dataset for Retrieving Data Tables through C... | \n", - "|
| WikipediaRetrievalMultilingual | \n", - "{srp, deu, nor, nld, por, swe, fas, ita, hin, ... | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "The dataset is derived from Cohere's wikipedia... | \n", - "|
| CovidRetrieval | \n", - "{cmn} | \n", - "None | \n", - "None | \n", - "COVID-19 news articles | \n", - "|
| InstructionRetrieval | \n", - "Core17InstructionRetrieval | \n", - "{eng} | \n", - "[News, Written] | \n", - "MIT | \n", - "Measuring retrieval instruction following abil... | \n", - "
| News21InstructionRetrieval | \n", - "{eng} | \n", - "[News, Written] | \n", - "MIT | \n", - "Measuring retrieval instruction following abil... | \n", - "|
| Robust04InstructionRetrieval | \n", - "{eng} | \n", - "[News, Written] | \n", - "MIT | \n", - "Measuring retrieval instruction following abil... | \n", - "|
| MultilabelClassification | \n", - "KorHateSpeechMLClassification | \n", - "{kor} | \n", - "[Social, Written] | \n", - "cc-by-sa-4.0 | \n", - "\\n The Korean Multi-label Hate Speech D... | \n", - "
| MalteseNewsClassification | \n", - "{mlt} | \n", - "[Constructed, Written] | \n", - "cc-by-nc-sa-4.0 | \n", - "A multi-label topic classification dataset for... | \n", - "|
| MultiEURLEXMultilabelClassification | \n", - "{est, ita, pol, hun, dan, lit, ell, slk, spa, ... | \n", - "[Legal, Government, Written] | \n", - "CC BY-SA 4.0 | \n", - "EU laws in 23 EU languages containing gold lab... | \n", - "|
| BrazilianToxicTweetsClassification | \n", - "{por} | \n", - "[Constructed, Written] | \n", - "CC BY-SA 4.0 | \n", - "\\n ToLD-Br is the biggest dataset for t... | \n", - "|
| CEDRClassification | \n", - "{rus} | \n", - "[Web, Social, Blog, Written] | \n", - "apache-2.0 | \n", - "Classification of sentences by emotions, label... | \n", - "|
| PairClassification | \n", - "CTKFactsNLI | \n", - "{ces} | \n", - "[News, Written] | \n", - "CC-BY-SA-3.0 | \n", - "Czech Natural Language Inference dataset of ar... | \n", - "
| SprintDuplicateQuestions | \n", - "{eng} | \n", - "[Programming, Written] | \n", - "Not specified | \n", - "Duplicate questions from the Sprint community. | \n", - "|
| TwitterURLCorpus | \n", - "{eng} | \n", - "None | \n", - "None | \n", - "Paraphrase-Pairs of Tweets. | \n", - "|
| ArmenianParaphrasePC | \n", - "{hye} | \n", - "[News, Written] | \n", - "Apache-2.0 | \n", - "asparius/Armenian-Paraphrase-PC | \n", - "|
| indonli | \n", - "{ind} | \n", - "[Encyclopaedic, Web, News, Written] | \n", - "CC-BY-SA 4.0 | \n", - "IndoNLI is the first human-elicited Natural La... | \n", - "|
| OpusparcusPC | \n", - "{deu, swe, fra, rus, eng, fin} | \n", - "[Spoken, Spoken] | \n", - "cc-by-nc-4.0 | \n", - "Opusparcus is a paraphrase corpus for six Euro... | \n", - "|
| PawsXPairClassification | \n", - "{deu, kor, cmn, fra, eng, jpn, spa} | \n", - "[Web, Encyclopaedic, Written] | \n", - "Custom (commercial) | \n", - "{PAWS-X: A Cross-lingual Adversarial Dataset f... | \n", - "|
| RTE3 | \n", - "{fra, deu, eng, ita} | \n", - "[News, Web, Encyclopaedic, Written] | \n", - "cc-by-4.0 | \n", - "Recognising Textual Entailment Challenge (RTE-... | \n", - "|
| XNLI | \n", - "{tha, ara, deu, vie, fra, hin, rus, zho, swa, ... | \n", - "[Non-fiction, Fiction, Government, Written] | \n", - "Not specified | \n", - "\n", - " | |
| PpcPC | \n", - "{pol} | \n", - "[Fiction, Non-fiction, Web, Written, Spoken, S... | \n", - "GPL-3.0 | \n", - "Polish Paraphrase Corpus | \n", - "|
| TERRa | \n", - "{rus} | \n", - "[News, Web, Written] | \n", - "mit | \n", - "Textual Entailment Recognition for Russian. Th... | \n", - "|
| Reranking | \n", - "WebLINXCandidatesReranking | \n", - "{eng} | \n", - "[Academic, Web, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "WebLINX is a large-scale benchmark of 100K int... | \n", - "
| AlloprofReranking | \n", - "{fra} | \n", - "[Web, Academic, Written] | \n", - "CC BY-NC-SA 4.0 | \n", - "This dataset was provided by AlloProf, an orga... | \n", - "|
| VoyageMMarcoReranking | \n", - "{jpn} | \n", - "[Academic, Non-fiction, Written] | \n", - "CC BY 4.0 | \n", - "a hard-negative augmented version of the Japan... | \n", - "|
| WikipediaRerankingMultilingual | \n", - "{srp, deu, nor, nld, por, swe, fas, ita, hin, ... | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-3.0 | \n", - "The dataset is derived from Cohere's wikipedia... | \n", - "|
| RuBQReranking | \n", - "{rus} | \n", - "[Encyclopaedic, Written] | \n", - "cc-by-sa-4.0 | \n", - "Paragraph reranking based on RuBQ 2.0. Give pa... | \n", - "|
| T2Reranking | \n", - "{cmn} | \n", - "None | \n", - "None | \n", - "T2Ranking: A large-scale Chinese Benchmark for... | \n", - "|
| STS | \n", - "GermanSTSBenchmark | \n", - "{deu} | \n", - "None | \n", - "None | \n", - "Semantic Textual Similarity Benchmark (STSbenc... | \n", - "
| SICK-R | \n", - "{eng} | \n", - "None | \n", - "None | \n", - "Semantic Textual Similarity SICK-R dataset as ... | \n", - "|
| STS12 | \n", - "{eng} | \n", - "[Encyclopaedic, News, Written] | \n", - "Not specified | \n", - "SemEval-2012 Task 6. | \n", - "|
| STS13 | \n", - "{eng} | \n", - "[Web, News, Non-fiction, Written] | \n", - "Not specified | \n", - "SemEval STS 2013 dataset. | \n", - "|
| STS14 | \n", - "{eng} | \n", - "[Blog, Web, Spoken] | \n", - "Not specified | \n", - "SemEval STS 2014 dataset. Currently only the E... | \n", - "|
| STS15 | \n", - "{eng} | \n", - "[Blog, News, Web, Written, Spoken] | \n", - "Not specified | \n", - "SemEval STS 2015 dataset | \n", - "|
| STSBenchmark | \n", - "{eng} | \n", - "None | \n", - "None | \n", - "Semantic Textual Similarity Benchmark (STSbenc... | \n", - "|
| FaroeseSTS | \n", - "{fao} | \n", - "[News, Web, Written] | \n", - "cc-by-4.0 | \n", - "Semantic Text Similarity (STS) corpus for Faro... | \n", - "|
| FinParaSTS | \n", - "{fin} | \n", - "[News, Subtitles, Written] | \n", - "cc-by-sa-4.0 | \n", - "Finnish paraphrase-based semantic similarity c... | \n", - "|
| JSICK | \n", - "{jpn} | \n", - "[Web, Written] | \n", - "cc-by-4.0 | \n", - "JSICK is the Japanese NLI and STS dataset by m... | \n", - "|
| IndicCrosslingualSTS | \n", - "{ory, tam, mal, guj, hin, tel, mar, kan, eng, ... | \n", - "[News, Non-fiction, Web, Spoken, Government, W... | \n", - "CC0 | \n", - "This is a Semantic Textual Similarity testset ... | \n", - "|
| SemRel24STS | \n", - "{kin, afr, ary, arq, hau, hin, ind, tel, amh, ... | \n", - "[Spoken, Written] | \n", - "Not specified | \n", - "SemRel2024 is a collection of Semantic Textual... | \n", - "|
| STS17 | \n", - "{ara, deu, kor, nld, ita, fra, eng, tur, spa} | \n", - "[News, Web, Written] | \n", - "Not specified | \n", - "Semeval-2017 task 1: Semantic textual similari... | \n", - "|
| STS22.v2 | \n", - "{ara, deu, cmn, ita, pol, fra, rus, eng, tur, ... | \n", - "[News, Written] | \n", - "Not specified | \n", - "SemEval 2022 Task 8: Multilingual News Article... | \n", - "|
| STSES | \n", - "{spa} | \n", - "[Written] | \n", - "cc-by-4.0 | \n", - "Spanish test sets from SemEval-2014 (Agirre et... | \n", - "|
| STSB | \n", - "{cmn} | \n", - "None | \n", - "None | \n", - "A Chinese dataset for textual relatedness | \n", - "
| \n", - " | Rank (Borda Count) | \n", - "mean | \n", - "mean (weighted by task type) | \n", - "mean (BitextMining) | \n", - "mean (PairClassification) | \n", - "mean (Classification) | \n", - "mean (STS) | \n", - "mean (Retrieval) | \n", - "mean (MultilabelClassification) | \n", - "mean (Clustering) | \n", - "mean (Reranking) | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|
| model | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| multilingual-e5-large-instruct | \n", - "1 (1244) | \n", - "63.4 | \n", - "55.3 | \n", - "80.1 | \n", - "81.2 | \n", - "65.0 | \n", - "76.7 | \n", - "58.0 | \n", - "22.9 | \n", - "51.5 | \n", - "63.0 | \n", - "
| GritLM-7B | \n", - "2 (1119) | \n", - "60.9 | \n", - "53.6 | \n", - "70.5 | \n", - "80.2 | \n", - "61.9 | \n", - "73.2 | \n", - "59.1 | \n", - "21.2 | \n", - "50.4 | \n", - "62.8 | \n", - "
| e5-mistral-7b-instruct | \n", - "3 (1100) | \n", - "60.2 | \n", - "53.1 | \n", - "70.6 | \n", - "81.4 | \n", - "60.3 | \n", - "73.9 | \n", - "55.4 | \n", - "22.2 | \n", - "51.4 | \n", - "63.4 | \n", - "
| multilingual-e5-large | \n", - "4 (980) | \n", - "58.7 | \n", - "51.5 | \n", - "71.7 | \n", - "79.3 | \n", - "59.9 | \n", - "73.4 | \n", - "55.0 | \n", - "21.3 | \n", - "43.1 | \n", - "62.6 | \n", - "
| multilingual-e5-base | \n", - "5 (811) | \n", - "57.1 | \n", - "50.0 | \n", - "69.4 | \n", - "77.6 | \n", - "58.2 | \n", - "71.2 | \n", - "53.6 | \n", - "20.2 | \n", - "42.8 | \n", - "59.9 | \n", - "
| paraphrase-multilingual-mpnet-base-v2 | \n", - "6 (698) | \n", - "52.0 | \n", - "45.2 | \n", - "52.1 | \n", - "81.6 | \n", - "55.1 | \n", - "69.5 | \n", - "39.3 | \n", - "16.4 | \n", - "41.2 | \n", - "53.2 | \n", - "
| multilingual-e5-small | \n", - "7 (654) | \n", - "55.6 | \n", - "48.8 | \n", - "67.5 | \n", - "76.8 | \n", - "56.5 | \n", - "69.9 | \n", - "50.2 | \n", - "19.1 | \n", - "41.8 | \n", - "60.2 | \n", - "
| LaBSE | \n", - "8 (589) | \n", - "52.1 | \n", - "45.8 | \n", - "76.3 | \n", - "76.1 | \n", - "54.6 | \n", - "65.2 | \n", - "32.9 | \n", - "20.1 | \n", - "39.4 | \n", - "50.4 | \n", - "
| paraphrase-multilingual-MiniLM-L12-v2 | \n", - "9 (475) | \n", - "48.8 | \n", - "42.5 | \n", - "44.5 | \n", - "79.4 | \n", - "51.7 | \n", - "66.4 | \n", - "36.2 | \n", - "14.9 | \n", - "39.6 | \n", - "51.0 | \n", - "
| all-mpnet-base-v2 | \n", - "10 (398) | \n", - "42.4 | \n", - "36.2 | \n", - "21.2 | \n", - "71.0 | \n", - "47.0 | \n", - "57.1 | \n", - "32.8 | \n", - "16.3 | \n", - "41.1 | \n", - "42.1 | \n", - "
| all-MiniLM-L12-v2 | \n", - "11 (355) | \n", - "42.1 | \n", - "36.2 | \n", - "22.9 | \n", - "71.9 | \n", - "46.8 | \n", - "56.6 | \n", - "32.4 | \n", - "14.6 | \n", - "36.8 | \n", - "44.3 | \n", - "
| all-MiniLM-L6-v2 | \n", - "12 (290) | \n", - "41.5 | \n", - "35.2 | \n", - "20.1 | \n", - "71.3 | \n", - "46.3 | \n", - "55.6 | \n", - "33.1 | \n", - "15.1 | \n", - "38.3 | \n", - "40.0 | \n", - "