diff --git a/recipes/translation/translate_jsonl.py b/recipes/translation/translate_jsonl.py index a298bb04ee..6899d907d0 100644 --- a/recipes/translation/translate_jsonl.py +++ b/recipes/translation/translate_jsonl.py @@ -96,6 +96,7 @@ def full_language_name(lang_code: str) -> str: @nested_dataclass(kw_only=True) class TranslationConfig(GenerationTaskConfig): + source_lang: str = "en" target_lang: str = "zh" use_skipme: bool = False fields_to_translate: List[str] = field(default_factory=lambda: ["conversations.*.value"]) @@ -223,7 +224,7 @@ def preprocess_data(self, data): async_position = 0 for text in all_translatable_lines: translation_data_point = { - "source_lang": full_language_name("en"), + "source_lang": full_language_name(self.cfg.source_lang), "target_lang": full_language_name(self.cfg.target_lang), "src": text, self.cfg.async_position_key: async_position, @@ -242,7 +243,7 @@ def log_example_prompt(self, data): data[0] if isinstance(data[0], dict) and "src" in data[0] else { - "source_lang": "English", + "source_lang": full_language_name(self.cfg.source_lang), "target_lang": full_language_name(self.cfg.target_lang), "src": "Sample text to translate", }