diff --git a/datasets/wmt14/wmt_utils.py b/datasets/wmt14/wmt_utils.py index 556ef9189f0..0786b63a39c 100644 --- a/datasets/wmt14/wmt_utils.py +++ b/datasets/wmt14/wmt_utils.py @@ -872,7 +872,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt15/wmt_utils.py b/datasets/wmt15/wmt_utils.py index e7cc48474f2..bc7b9f2b948 100644 --- a/datasets/wmt15/wmt_utils.py +++ b/datasets/wmt15/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt16/wmt_utils.py b/datasets/wmt16/wmt_utils.py index 0bd210bc178..ac51439e587 100644 --- a/datasets/wmt16/wmt_utils.py +++ b/datasets/wmt16/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt17/wmt_utils.py b/datasets/wmt17/wmt_utils.py index 7ab7bd361f4..16b2ce2e2f1 100644 --- a/datasets/wmt17/wmt_utils.py +++ b/datasets/wmt17/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt18/wmt_utils.py b/datasets/wmt18/wmt_utils.py index c2b4f84adb4..791b62f4dea 100644 --- a/datasets/wmt18/wmt_utils.py +++ b/datasets/wmt18/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt19/wmt_utils.py b/datasets/wmt19/wmt_utils.py index a6ff54d7323..fa3bb4b4207 100644 --- a/datasets/wmt19/wmt_utils.py +++ b/datasets/wmt19/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1] diff --git a/datasets/wmt_t2t/wmt_utils.py b/datasets/wmt_t2t/wmt_utils.py index bd1b850178a..719e72b30ca 100644 --- a/datasets/wmt_t2t/wmt_utils.py +++ b/datasets/wmt_t2t/wmt_utils.py @@ -875,7 +875,7 @@ def gen(): if split_path[-1] == "txt": # CWMT lang = split_path[-2].split("_")[-1] - lang = "zh" if lang in ("ch", "cn") else lang + lang = "zh" if lang in ("ch", "cn", "c[hn]") else lang else: lang = split_path[-1]