From aace2cb5cdeaa1e9b62314161f49cc9a85c2bbd6 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Tue, 6 May 2025 22:04:08 +0200 Subject: [PATCH] fix: SIB200 machine translated > human translated As correctly pointed out in: https://huggingface.co/datasets/mteb/sib200/discussions/1 --- mteb/tasks/Classification/multilingual/SIB200Classification.py | 2 +- mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/tasks/Classification/multilingual/SIB200Classification.py b/mteb/tasks/Classification/multilingual/SIB200Classification.py index bbfb20e40b..63fa8c3f84 100644 --- a/mteb/tasks/Classification/multilingual/SIB200Classification.py +++ b/mteb/tasks/Classification/multilingual/SIB200Classification.py @@ -212,7 +212,7 @@ class SIB200Classification(MultilingualTask, AbsTaskClassification): dataset based on Flores-200 covering 205 languages and dialects annotated. The dataset is annotated in English for the topics, science/technology, travel, politics, sports, health, entertainment, and geography. The labels are then transferred to the other languages - in Flores-200 which are machine-translated. + in Flores-200 which are human-translated. """, reference="https://arxiv.org/abs/2309.07445", dataset={ diff --git a/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py b/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py index 68e66ddc3e..c9a850a030 100644 --- a/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/SIB200ClusteringS2S.py @@ -217,7 +217,7 @@ class SIB200ClusteringFast(MultilingualTask, AbsTaskClusteringFast): dataset based on Flores-200 covering 205 languages and dialects annotated. The dataset is annotated in English for the topics, science/technology, travel, politics, sports, health, entertainment, and geography. The labels are then transferred to the other languages - in Flores-200 which are machine-translated. + in Flores-200 which are human-translated. """, reference="https://arxiv.org/abs/2309.07445", dataset={