From 04e0aeb06ed820efaacd5970e85ada52a9ad0120 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Mon, 22 Aug 2022 18:38:36 +0200 Subject: [PATCH] fix cwmt zh subsets --- datasets/wmt14/wmt_utils.py | 2 +- datasets/wmt15/wmt_utils.py | 2 +- datasets/wmt16/wmt_utils.py | 2 +- datasets/wmt17/wmt_utils.py | 2 +- datasets/wmt18/wmt_utils.py | 2 +- datasets/wmt19/wmt_utils.py | 2 +- datasets/wmt_t2t/wmt_utils.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasets/wmt14/wmt_utils.py b/datasets/wmt14/wmt_utils.py index 556ef9189f0..0786b63a39c 100644 --- a/datasets/wmt14/wmt_utils.py +++ b/datasets/wmt14/wmt_utils.py @@ -872,7 +872,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt15/wmt_utils.py b/datasets/wmt15/wmt_utils.py index e7cc48474f2..bc7b9f2b948 100644 --- a/datasets/wmt15/wmt_utils.py +++ b/datasets/wmt15/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt16/wmt_utils.py b/datasets/wmt16/wmt_utils.py index 0bd210bc178..ac51439e587 100644 --- a/datasets/wmt16/wmt_utils.py +++ b/datasets/wmt16/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt17/wmt_utils.py b/datasets/wmt17/wmt_utils.py index 7ab7bd361f4..16b2ce2e2f1 100644 --- a/datasets/wmt17/wmt_utils.py +++ b/datasets/wmt17/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt18/wmt_utils.py b/datasets/wmt18/wmt_utils.py index c2b4f84adb4..791b62f4dea 100644 --- a/datasets/wmt18/wmt_utils.py +++ b/datasets/wmt18/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt19/wmt_utils.py b/datasets/wmt19/wmt_utils.py index a6ff54d7323..fa3bb4b4207 100644 --- a/datasets/wmt19/wmt_utils.py +++ b/datasets/wmt19/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt_t2t/wmt_utils.py b/datasets/wmt_t2t/wmt_utils.py index bd1b850178a..719e72b30ca 100644 --- a/datasets/wmt_t2t/wmt_utils.py +++ b/datasets/wmt_t2t/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1]