Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4bd3b39
[ADD] 50 vietnamese dataset from vn-mteb
BaoLocPham Aug 1, 2025
e87fb0d
[UPDATE] task metadata
BaoLocPham Aug 2, 2025
5d18eca
[UPDATE] import dependencies
BaoLocPham Aug 2, 2025
e3237aa
Merge branch 'embeddings-benchmark:main' into main
BaoLocPham Aug 4, 2025
0f7a192
[UPDATE] task metadata, bibtext citation
BaoLocPham Aug 4, 2025
35948bc
[UPDATE-TEST] test_model_meta
BaoLocPham Aug 4, 2025
c136fb6
[UPDATE] sample_creation to machine-translated and LM verified
BaoLocPham Aug 4, 2025
b26a507
[ADD] sample creation machine-translated and LM verified
BaoLocPham Aug 4, 2025
4562482
[ADD] Vietnamese Embedding models
BaoLocPham Aug 7, 2025
9d2a03b
[REMOVE] default fields metadata in Classfication tasks
BaoLocPham Aug 7, 2025
e65654d
Merge branch 'embeddings-benchmark:main' into main
BaoLocPham Aug 9, 2025
79e271a
Merge branch 'main' of https://github.com/BaoLocPham/vn-mteb into fea…
BaoLocPham Aug 9, 2025
1923ddd
Merge branch 'embeddings-benchmark:main' into feat/models
BaoLocPham Aug 17, 2025
fb1db09
[UPDATE] model to vi-vn language specific file
BaoLocPham Aug 17, 2025
02de367
Merge branch 'feat/models' of https://github.com/BaoLocPham/vn-mteb i…
BaoLocPham Aug 17, 2025
68aa669
[FIX] lint
BaoLocPham Aug 18, 2025
770aeff
[FIX] model loader
BaoLocPham Aug 18, 2025
30b6c34
Merge branch 'embeddings-benchmark:main' into feat/models
BaoLocPham Aug 18, 2025
f3a6269
Merge branch 'embeddings-benchmark:main' into main
BaoLocPham Aug 19, 2025
9d74873
Merge branch 'main' of https://github.com/BaoLocPham/vn-mteb into fix…
BaoLocPham Aug 19, 2025
c4d9c0d
[FIX] VN-MTEB 3 datasets PairClassification rename column
BaoLocPham Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class SprintDuplicateQuestionsPCVN(AbsTaskPairClassification):
category="s2s",
eval_splits=["validation", "test"],
eval_langs=["vie-Latn"],
main_score="ap",
main_score="max_ap",
date=("2025-07-29", "2025-07-30"),
license="cc-by-sa-4.0",
annotations_creators="derived",
Expand All @@ -42,3 +42,7 @@ class SprintDuplicateQuestionsPCVN(AbsTaskPairClassification):
""",
adapted_from=["SprintDuplicateQuestions"],
)

def dataset_transform(self):
self.dataset = self.dataset.rename_column("sent1", "sentence1")
self.dataset = self.dataset.rename_column("sent2", "sentence2")
6 changes: 5 additions & 1 deletion mteb/tasks/PairClassification/vie/TwitterSemEval2015PCVN.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class TwitterSemEval2015PCVN(AbsTaskPairClassification):
type="PairClassification",
eval_splits=["test"],
eval_langs=["vie-Latn"],
main_score="ap",
main_score="max_ap",
date=("2025-07-29", "2025-07-30"),
license="cc-by-sa-4.0",
annotations_creators="derived",
Expand All @@ -42,3 +42,7 @@ class TwitterSemEval2015PCVN(AbsTaskPairClassification):
""",
adapted_from=["TwitterSemEval2015"],
)

def dataset_transform(self):
self.dataset = self.dataset.rename_column("sent1", "sentence1")
self.dataset = self.dataset.rename_column("sent2", "sentence2")
6 changes: 5 additions & 1 deletion mteb/tasks/PairClassification/vie/TwitterURLCorpusPCVN.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class TwitterURLCorpusPC(AbsTaskPairClassification):
type="PairClassification",
eval_splits=["test"],
eval_langs=["vie-Latn"],
main_score="ap",
main_score="max_ap",
date=("2025-07-29", "2025-07-30"),
license="cc-by-sa-4.0",
annotations_creators="derived",
Expand All @@ -42,3 +42,7 @@ class TwitterURLCorpusPC(AbsTaskPairClassification):
""",
adapted_from=["TwitterURLCorpus"],
)

def dataset_transform(self):
self.dataset = self.dataset.rename_column("sent1", "sentence1")
self.dataset = self.dataset.rename_column("sent2", "sentence2")
Loading