Skip to content

[MAEB] Add GLOBE v3 dataset#4091

Merged
Samoed merged 3 commits intoembeddings-benchmark:maebfrom
diffunity:maeb_globev3
Feb 13, 2026
Merged

[MAEB] Add GLOBE v3 dataset#4091
Samoed merged 3 commits intoembeddings-benchmark:maebfrom
diffunity:maeb_globev3

Conversation

@diffunity
Copy link
Collaborator

If you add a model or a dataset, please add the corresponding checklist:


Perf result with facebook/hubert-base-ls960

GLOBE v2 Age perf
{
  "dataset_revision": "b36e803c88037b09688f7c915d93b4cd654ba67e",
  "task_name": "GLOBEV2Age",
  "mteb_version": "2.7.25",
  "scores": {
    "train": [
      {
        "scores_per_experiment": [
          {
            "accuracy": 0.1898,
            "f1": 0.266879,
            "f1_weighted": 0.214302,
            "precision": 0.283884,
            "precision_weighted": 0.328228,
            "recall": 0.31934,
            "recall_weighted": 0.1898,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1846,
            "f1": 0.272757,
            "f1_weighted": 0.206841,
            "precision": 0.301653,
            "precision_weighted": 0.370479,
            "recall": 0.35243,
            "recall_weighted": 0.1846,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1836,
            "f1": 0.271033,
            "f1_weighted": 0.19902,
            "precision": 0.305808,
            "precision_weighted": 0.391794,
            "recall": 0.338014,
            "recall_weighted": 0.1836,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.2196,
            "f1": 0.287799,
            "f1_weighted": 0.233353,
            "precision": 0.306469,
            "precision_weighted": 0.375702,
            "recall": 0.344944,
            "recall_weighted": 0.2196,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1786,
            "f1": 0.265259,
            "f1_weighted": 0.197294,
            "precision": 0.287822,
            "precision_weighted": 0.341984,
            "recall": 0.328891,
            "recall_weighted": 0.1786,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1964,
            "f1": 0.284313,
            "f1_weighted": 0.212601,
            "precision": 0.304775,
            "precision_weighted": 0.352687,
            "recall": 0.362618,
            "recall_weighted": 0.1964,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1766,
            "f1": 0.275949,
            "f1_weighted": 0.181255,
            "precision": 0.310505,
            "precision_weighted": 0.369717,
            "recall": 0.355163,
            "recall_weighted": 0.1766,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.183,
            "f1": 0.26289,
            "f1_weighted": 0.197632,
            "precision": 0.279855,
            "precision_weighted": 0.321648,
            "recall": 0.313785,
            "recall_weighted": 0.183,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.193,
            "f1": 0.27208,
            "f1_weighted": 0.218474,
            "precision": 0.293903,
            "precision_weighted": 0.349339,
            "recall": 0.32361,
            "recall_weighted": 0.193,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.203,
            "f1": 0.283188,
            "f1_weighted": 0.218452,
            "precision": 0.296756,
            "precision_weighted": 0.341485,
            "recall": 0.348565,
            "recall_weighted": 0.203,
            "ap": null,
            "ap_weighted": null
          }
        ],
        "accuracy": 0.19082,
        "f1": 0.274215,
        "f1_weighted": 0.207922,
        "precision": 0.297143,
        "precision_weighted": 0.354306,
        "recall": 0.338736,
        "recall_weighted": 0.19082,
        "ap": NaN,
        "ap_weighted": NaN,
        "main_score": 0.19082,
        "hf_subset": "default",
        "languages": [
          "eng-Latn"
        ]
      }
    ]
  },
  "evaluation_time": 111.55904340744019,
  "kg_co2_emissions": null
}
GLOBE v3 Age perf
{
  "dataset_revision": "f7399f4b836508a178c0913868e82462b4a8919b",
  "task_name": "GLOBEV3Age",
  "mteb_version": "2.7.25",
  "scores": {
    "test": [
      {
        "scores_per_experiment": [
          {
            "accuracy": 0.118,
            "f1": 0.096891,
            "f1_weighted": 0.136906,
            "precision": 0.12758,
            "precision_weighted": 0.222275,
            "recall": 0.168033,
            "recall_weighted": 0.118,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1562,
            "f1": 0.112519,
            "f1_weighted": 0.146576,
            "precision": 0.146326,
            "precision_weighted": 0.256424,
            "recall": 0.143233,
            "recall_weighted": 0.1562,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1754,
            "f1": 0.117358,
            "f1_weighted": 0.183352,
            "precision": 0.13677,
            "precision_weighted": 0.243906,
            "recall": 0.178396,
            "recall_weighted": 0.1754,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1416,
            "f1": 0.113694,
            "f1_weighted": 0.156998,
            "precision": 0.150704,
            "precision_weighted": 0.262231,
            "recall": 0.200381,
            "recall_weighted": 0.1416,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1832,
            "f1": 0.119853,
            "f1_weighted": 0.191343,
            "precision": 0.146549,
            "precision_weighted": 0.261196,
            "recall": 0.128697,
            "recall_weighted": 0.1832,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1368,
            "f1": 0.104628,
            "f1_weighted": 0.155017,
            "precision": 0.143909,
            "precision_weighted": 0.251367,
            "recall": 0.125578,
            "recall_weighted": 0.1368,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.172,
            "f1": 0.125526,
            "f1_weighted": 0.181536,
            "precision": 0.139052,
            "precision_weighted": 0.237597,
            "recall": 0.138361,
            "recall_weighted": 0.172,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1126,
            "f1": 0.093525,
            "f1_weighted": 0.121661,
            "precision": 0.137891,
            "precision_weighted": 0.243164,
            "recall": 0.12934,
            "recall_weighted": 0.1126,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1672,
            "f1": 0.120962,
            "f1_weighted": 0.176902,
            "precision": 0.140719,
            "precision_weighted": 0.244364,
            "recall": 0.137867,
            "recall_weighted": 0.1672,
            "ap": null,
            "ap_weighted": null
          },
          {
            "accuracy": 0.1338,
            "f1": 0.107529,
            "f1_weighted": 0.147894,
            "precision": 0.155887,
            "precision_weighted": 0.272045,
            "recall": 0.14416,
            "recall_weighted": 0.1338,
            "ap": null,
            "ap_weighted": null
          }
        ],
        "accuracy": 0.14968,
        "f1": 0.111248,
        "f1_weighted": 0.159819,
        "precision": 0.142539,
        "precision_weighted": 0.249457,
        "recall": 0.149404,
        "recall_weighted": 0.14968,
        "ap": NaN,
        "ap_weighted": NaN,
        "main_score": 0.14968,
        "hf_subset": "default",
        "languages": [
          "eng-Latn"
        ]
      }
    ]
  },
  "evaluation_time": 132.41054725646973,
  "kg_co2_emissions": null
}
GLOBE v3 Gender perf
{
  "dataset_revision": "7020a6c14ec8a8e967013e04f2a695ead308bee1",
  "task_name": "GLOBEV3Gender",
  "mteb_version": "2.7.25",
  "scores": {
    "test": [
      {
        "scores_per_experiment": [
          {
            "accuracy": 0.5534,
            "f1": 0.55297,
            "f1_weighted": 0.557266,
            "precision": 0.597859,
            "precision_weighted": 0.653432,
            "recall": 0.601604,
            "recall_weighted": 0.5534,
            "ap": 0.709482,
            "ap_weighted": 0.709482
          },
          {
            "accuracy": 0.7204,
            "f1": 0.696982,
            "f1_weighted": 0.723096,
            "precision": 0.694011,
            "precision_weighted": 0.727265,
            "recall": 0.701648,
            "recall_weighted": 0.7204,
            "ap": 0.766489,
            "ap_weighted": 0.766489
          },
          {
            "accuracy": 0.7272,
            "f1": 0.716531,
            "f1_weighted": 0.733579,
            "precision": 0.716427,
            "precision_weighted": 0.759244,
            "recall": 0.73798,
            "recall_weighted": 0.7272,
            "ap": 0.795322,
            "ap_weighted": 0.795322
          },
          {
            "accuracy": 0.7252,
            "f1": 0.714773,
            "f1_weighted": 0.731679,
            "precision": 0.715106,
            "precision_weighted": 0.758328,
            "recall": 0.736728,
            "recall_weighted": 0.7252,
            "ap": 0.794581,
            "ap_weighted": 0.794581
          },
          {
            "accuracy": 0.7074,
            "f1": 0.677993,
            "f1_weighted": 0.708159,
            "precision": 0.677119,
            "precision_weighted": 0.709005,
            "recall": 0.678967,
            "recall_weighted": 0.7074,
            "ap": 0.751317,
            "ap_weighted": 0.751317
          },
          {
            "accuracy": 0.7314,
            "f1": 0.687009,
            "f1_weighted": 0.72355,
            "precision": 0.702476,
            "precision_weighted": 0.722707,
            "recall": 0.680002,
            "recall_weighted": 0.7314,
            "ap": 0.750354,
            "ap_weighted": 0.750354
          },
          {
            "accuracy": 0.6848,
            "f1": 0.630814,
            "f1_weighted": 0.674579,
            "precision": 0.644115,
            "precision_weighted": 0.671847,
            "recall": 0.626459,
            "recall_weighted": 0.6848,
            "ap": 0.719009,
            "ap_weighted": 0.719009
          },
          {
            "accuracy": 0.7072,
            "f1": 0.700387,
            "f1_weighted": 0.714393,
            "precision": 0.708517,
            "precision_weighted": 0.75711,
            "recall": 0.730669,
            "recall_weighted": 0.7072,
            "ap": 0.792718,
            "ap_weighted": 0.792718
          },
          {
            "accuracy": 0.5938,
            "f1": 0.5938,
            "f1_weighted": 0.593775,
            "precision": 0.657043,
            "precision_weighted": 0.720204,
            "recall": 0.657,
            "recall_weighted": 0.5938,
            "ap": 0.748249,
            "ap_weighted": 0.748249
          },
          {
            "accuracy": 0.7066,
            "f1": 0.681757,
            "f1_weighted": 0.709321,
            "precision": 0.679124,
            "precision_weighted": 0.713359,
            "recall": 0.685901,
            "recall_weighted": 0.7066,
            "ap": 0.756278,
            "ap_weighted": 0.756278
          }
        ],
        "accuracy": 0.68574,
        "f1": 0.665302,
        "f1_weighted": 0.68694,
        "precision": 0.67918,
        "precision_weighted": 0.71925,
        "recall": 0.683696,
        "recall_weighted": 0.68574,
        "ap": 0.75838,
        "ap_weighted": 0.75838,
        "main_score": 0.68574,
        "hf_subset": "default",
        "languages": [
          "eng-Latn"
        ]
      }
    ]
  },
  "evaluation_time": 126.9462685585022,
  "kg_co2_emissions": null
}

@diffunity diffunity changed the title fix globe v2 and add globe v3 [MAEB] Add GLOBE v3 dataset Feb 12, 2026
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it then cross-validation if we use train split?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry got it mixed up. fixed the changes, thanks

@Samoed Samoed merged commit 6120b00 into embeddings-benchmark:maeb Feb 13, 2026
10 of 11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants