Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 77146aa

Browse files
voluntadpearcopybara-github
authored andcommitted
Merge of PR #1626
PiperOrigin-RevId: 258838381
1 parent 69a81e8 commit 77146aa

File tree

3 files changed

+9
-1
lines changed

3 files changed

+9
-1
lines changed

docs/walkthrough.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ There are a number of translation data-sets in T2T:
210210
* English-Czech: `--problem=translate_encs_wmt32k`
211211
* English-Chinese: `--problem=translate_enzh_wmt32k`
212212
* English-Vietnamese: `--problem=translate_envi_iwslt32k`
213+
* English-Spanish: `--problem=translate_enes_wmt32k`
213214

214215
You can get translations in the other direction by appending `_rev` to
215216
the problem name, e.g., for German-English use

tensor2tensor/data_generators/all_problems.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
"tensor2tensor.data_generators.transduction_problems",
7979
"tensor2tensor.data_generators.translate_encs",
8080
"tensor2tensor.data_generators.translate_ende",
81+
"tensor2tensor.data_generators.translate_enes",
8182
"tensor2tensor.data_generators.translate_enet",
8283
"tensor2tensor.data_generators.translate_enfr",
8384
"tensor2tensor.data_generators.translate_enid",

tensor2tensor/data_generators/translate_enes.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
],
5454
]
5555

56+
5657
@registry.register_problem
5758
class TranslateEnesWmt32k(translate.TranslateProblem):
5859
"""En-es translation trained on WMT corpus."""
@@ -70,6 +71,7 @@ def source_data_files(self, dataset_split):
7071
def vocab_data_files(self):
7172
return _ENES_TRAIN_DATASETS
7273

74+
7375
@registry.register_problem
7476
class TranslateEnesWmtClean32k(TranslateEnesWmt32k):
7577
"""En-es translation trained on WMT with further cleaning."""
@@ -82,6 +84,7 @@ def use_vocab_from_other_problem(self):
8284
def datatypes_to_clean(self):
8385
return ["txt"]
8486

87+
8588
@registry.register_problem
8689
class TranslateEnesWmt32kPacked(TranslateEnesWmt32k):
8790

@@ -93,6 +96,7 @@ def packed_length(self):
9396
def use_vocab_from_other_problem(self):
9497
return TranslateEnesWmt32k()
9598

99+
96100
@registry.register_problem
97101
class TranslateEnesWmt8k(TranslateEnesWmt32k):
98102
"""Problem spec for WMT En-Es translation."""
@@ -101,6 +105,7 @@ class TranslateEnesWmt8k(TranslateEnesWmt32k):
101105
def approx_vocab_size(self):
102106
return 2**13 # 8192
103107

108+
104109
@registry.register_problem
105110
class TranslateEnesWmt8kPacked(TranslateEnesWmt8k):
106111

@@ -112,10 +117,11 @@ def packed_length(self):
112117
def use_vocab_from_other_problem(self):
113118
return TranslateEnesWmt8k()
114119

120+
115121
@registry.register_problem
116122
class TranslateEnesWmtCharacters(TranslateEnesWmt8k):
117123
"""Problem spec for WMT En-Es translation."""
118124

119125
@property
120126
def vocab_type(self):
121-
return text_problems.VocabType.CHARACTER
127+
return text_problems.VocabType.CHARACTER

0 commit comments

Comments
 (0)