hedyorg · Felienne · Dec 1, 2023 · Dec 1, 2023 · Dec 2, 2023 · Dec 2, 2023
diff --git a/hedy.py b/hedy.py
@@ -1254,6 +1254,12 @@ def valid_echo(ast):
 class IsComplete(Filter):
     def __init__(self, level):
         self.level = level
+
+    # ah so we actually have 2 types of "error productions"!
+    # true ones that live in the grammar like error_ask_dep_2
+    # and these ones where the parser combines valid and not valid
+    # versions, like print: _PRINT (text)?
+
     # print, ask and echo can miss arguments and then are not complete
     # used to generate more informative error messages
     # tree is transformed to a node of [True] or [False, args, line_number]

diff --git a/hedy_content.py b/hedy_content.py
@@ -15,10 +15,6 @@
 ALL_LANGUAGES = {}
 ALL_KEYWORD_LANGUAGES = {}
 
-# Todo TB -> We create this list manually, but it would be nice if we find
-# a way to automate this as well
-NON_LATIN_LANGUAGES = ['ar', 'bg', 'bn', 'el', 'fa', 'hi', 'he', 'pa_PK', 'ru', 'zh_Hans']
-
 # Babel has a different naming convention than Weblate and doesn't support some languages -> fix this manually
 CUSTOM_BABEL_LANGUAGES = {'pa_PK': 'pa_Arab_PK',
                           'kmr': 'ku_TR',

diff --git a/hedy_translation.py b/hedy_translation.py
@@ -75,6 +75,7 @@ def get_target_keyword(keyword_dict, keyword):
 
 def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
     """ "Return code with keywords translated to language of choice in level of choice"""
+
     if input_string == "":
         return " "  # empty string is True, so output something else that looks like the empty string
 
@@ -100,7 +101,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
         ordered_rules = reversed(sorted(translator.rules, key=operator.attrgetter("line", "start")))
 
         # checks whether any error production nodes are present in the parse tree
-        hedy.is_program_valid(program_root, input_string, level, from_lang)
+        # hedy.is_program_valid(program_root, input_string, level, from_lang)
 
         result = processed_input
         for rule in ordered_rules:
@@ -116,7 +117,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
         result = "\n".join([line for line in result.splitlines()])
         result = result.replace("#ENDBLOCK", "")
 
-        # we have to reverse escaping or translating and retranslating will add an unlimied number of slashes
+        # we have to reverse escaping or translating and retranslating will add an unlimited number of slashes
         if level >= 4:
             result = result.replace("\\\\", "\\")
 
@@ -179,17 +180,13 @@ def find_keyword_in_rules(rules, keyword, start_line, end_line, start_column, en
 
 
 def get_original_keyword(keyword_dict, keyword, line):
-    found = False
     for word in keyword_dict[keyword]:
         if word in line:
-            original = word
-            found = True
+            return word
+
     # If we can't find the keyword, it means that it isn't part of the valid keywords for this language
     # so return original instead
-    if found:
-        return original
-    else:
-        return keyword
+    return keyword
 
 
 class Translator(Visitor):
@@ -239,13 +236,18 @@ def turn(self, tree):
         self.add_rule("_TURN", "turn", tree)
 
     def left(self, tree):
-        token = tree.children[0]
-        rule = Rule("left", token.line, token.column - 1, token.end_column - 2, token.value)
+        # somehow for some Arabic rules (left, right, random) the parser returns separate tokens instead of one!
+        token_start = tree.children[0]
+        token_end = tree.children[-1]
+        value = ''.join(tree.children)
+        rule = Rule("left", token_start.line, token_start.column - 1, token_end.end_column - 2, value)
         self.rules.append(rule)
 
     def right(self, tree):
-        token = tree.children[0]
-        rule = Rule("right", token.line, token.column - 1, token.end_column - 2, token.value)
+        token_start = tree.children[0]
+        token_end = tree.children[-1]
+        value = ''.join(tree.children)
+        rule = Rule("right", token_start.line, token_start.column - 1, token_end.end_column - 2, value)
         self.rules.append(rule)
 
     def assign_list(self, tree):
@@ -270,10 +272,19 @@ def remove(self, tree):
         self.add_rule("_FROM", "from", tree)
 
     def random(self, tree):
-        token = tree.children[0]
-        rule = Rule("random", token.line, token.column - 1, token.end_column - 2, token.value)
+        # somehow for Arabic tokens, we parse into separate tokens instead of one!
+        token_start = tree.children[0]
+        token_end = tree.children[-1]
+        value = ''.join(tree.children)
+        rule = Rule("random", token_start.line, token_start.column - 1, token_end.end_column - 2, value)
         self.rules.append(rule)
 
+    def error_ask_dep_2(self, tree):
+        self.add_rule("_ASK", "ask", tree)
+
+    def error_echo_dep_2(self, tree):
+        self.add_rule("_ECHO", "echo", tree)
+
     def ifs(self, tree):
         self.add_rule("_IF", "if", tree)
 

diff --git a/tests/Tester.py b/tests/Tester.py
@@ -1,3 +1,4 @@
+import random
 import textwrap
 import pickle
 import hashlib
@@ -264,25 +265,6 @@ def single_level_tester(
                     if expected is not None:
                         self.assertEqual(expected, result.code)
 
-                    if translate:
-                        if lang == 'en':  # if it is English
-                            # and if the code transpiles (evidenced by the fact that we reach this
-                            # line) we should be able to translate too
-
-                            # TODO FH Feb 2022: we pick Dutch here not really fair or good practice :D
-                            # Maybe we should do a random language?
-                            in_dutch = hedy_translation.translate_keywords(
-                                code, from_lang=lang, to_lang="nl", level=self.level)
-                            back_in_english = hedy_translation.translate_keywords(
-                                in_dutch, from_lang="nl", to_lang=lang, level=self.level).strip()
-                            self.assert_translated_code_equal(code, back_in_english)
-                        else:  # not English? translate to it and back!
-                            in_english = hedy_translation.translate_keywords(
-                                code, from_lang=lang, to_lang="en", level=self.level)
-                            back_in_org = hedy_translation.translate_keywords(
-                                in_english, from_lang="en", to_lang=lang, level=self.level)
-                            self.assert_translated_code_equal(code, back_in_org)
-
                     all_commands = result.commands
                     if expected_commands is not None:
                         self.assertEqual(expected_commands, all_commands)
@@ -295,6 +277,19 @@ def single_level_tester(
                         self.assertEqual(output, HedyTester.run_code(result))
                         self.assertTrue(extra_check_function(result))
 
+            # whether or not the code should give an exception,
+            # if it parses, it should always be possible
+            # to translate it, unless there is an NoIndentationException
+            # because in that case our preprocessor throws the error so there is no parsetree
+            # (todo maybe parse first?)
+
+            skipped_exceptions = [hedy.exceptions.ParseException, hedy.exceptions.NoIndentationException,
+                                  hedy.exceptions.IndentationException, hedy.exceptions.LockedLanguageFeatureException,
+                                  hedy.exceptions.CodePlaceholdersPresentException, hedy.exceptions.InvalidCommandException]
+
+            if translate and not exception in skipped_exceptions:
+                self.verify_translation(code, lang, level)
+
             # all ok? -> save hash!
             hash_of_run = create_hash(get_hedy_source_hash(), test_hash)
             if hash_of_run:
@@ -303,6 +298,30 @@ def single_level_tester(
                 with open(filename, "w") as fp:
                     fp.write("")
 
+    def verify_translation(self, code, lang, level):
+        if lang == 'en':  # if it is English
+
+            # pick a random language to translate to
+            # all = list(ALL_KEYWORD_LANGUAGES.keys()) <- this no longer really holds
+            # all keyword languages! TODO fix or remove
+
+            # a nice mix of latin/non-latin and l2r and r2l!
+            all = ['ar', 'ca', 'sq', 'bg', 'es', 'fi', 'fr', 'he', 'nl', 'hi', 'ur', 'te', 'th', 'vi', 'uk', 'tr']
+
+            to_lang = random.choice(all)
+
+            translated = hedy_translation.translate_keywords(
+                code, from_lang=lang, to_lang=to_lang, level=level)
+            back_in_english = hedy_translation.translate_keywords(
+                translated, from_lang=to_lang, to_lang=lang, level=level).strip()
+            self.assert_translated_code_equal(code, back_in_english)
+        else:  # not English? translate to it and back!
+            in_english = hedy_translation.translate_keywords(
+                code, from_lang=lang, to_lang="en", level=level)
+            back_in_org = hedy_translation.translate_keywords(
+                in_english, from_lang="en", to_lang=lang, level=level)
+            self.assert_translated_code_equal(code, back_in_org)
+
     def source_map_tester(self, code, expected_source_map: dict):
         result = hedy.transpile(code, self.level, 'en')
         self.assertDictEqual(result.source_map.get_compressed_mapping(), expected_source_map)

diff --git a/tests/test_level/test_level_01.py b/tests/test_level/test_level_01.py
@@ -478,6 +478,18 @@ def test_turn_left_nl(self):
             lang='nl'
         )
 
+    def test_turn_ar(self):
+        # doesn't translate, I don't know why!!
+        code = "استدر يسار"
+        expected = "t.left(90)"
+
+        self.single_level_tester(
+            code=code,
+            expected=expected,
+            extra_check_function=self.is_turtle(),
+            lang='ar'
+        )
+
     def test_turn_with_text_gives_error(self):
         code = textwrap.dedent("""\
         turn koekoek
@@ -691,12 +703,14 @@ def test_one_mistake_not_skipped(self):
 
     def test_lonely_echo_gives_LonelyEcho(self):
         code = "echo wat dan?"
-        self.single_level_tester(code, exception=hedy.exceptions.LonelyEchoException)
+        self.single_level_tester(
+            code,
+            exception=hedy.exceptions.LonelyEchoException)
 
     def test_echo_before_ask_gives_lonely_echo(self):
         code = textwrap.dedent("""\
         echo what can't we do?
-        ask time travel """)
+        ask time travel""")
         self.single_level_tester(code, exception=hedy.exceptions.LonelyEchoException)
 
     def test_pint_after_empty_line_gives_error_line_3(self):
@@ -755,6 +769,7 @@ def test_non_keyword_with_argument_gives_invalid(self):
             code=code,
             expected=expected,
             skipped_mappings=skipped_mappings,
+            translate=False,
             extra_check_function=lambda c: c.arguments['invalid_command'] in ['aks', 'prind'],
             max_level=5,
         )

diff --git a/tests/test_level/test_level_04.py b/tests/test_level/test_level_04.py
@@ -313,7 +313,7 @@ def test_assign_catalan_var_name(self):
 
     def test_place_holder_no_space(self):
         # same as print for level 4
-        code = "print _Escape from the haunted house!_"
+        code = "print _ Escape from the haunted house! _"
 
         self.multi_level_tester(
             code=code,

diff --git a/tests/test_level/test_level_08.py b/tests/test_level/test_level_08.py
@@ -359,7 +359,8 @@ def test_if_else_no_indentation(self):
 
         # gives the right exception for all levels even though it misses brackets
         # because the indent check happens before parsing
-        self.multi_level_tester(code=code, exception=hedy.exceptions.NoIndentationException)
+        self.multi_level_tester(code=code,
+                                exception=hedy.exceptions.NoIndentationException)
 
     def test_if_equality_print_else_print(self):
         code = textwrap.dedent("""\

diff --git a/tests/test_translation_level/test_translation_level_01.py b/tests/test_translation_level/test_translation_level_01.py
@@ -89,6 +89,15 @@ def test_ask_dutch_english(self):
 
         self.assertEqual(expected, result)
 
+    def test_echo_french(self):
+        code = "echo Hedy"
+
+        result = hedy_translation.translate_keywords(
+            code, "en", "fr", self.level)
+        expected = "réponds Hedy"
+
+        self.assertEqual(expected, result)
+
     def test_echo_dutch_english(self):
         code = "vraag stel je vraag\necho tekst"
 
@@ -137,16 +146,8 @@ def test_translate_back(self):
 
         self.assertEqual(code, result)
 
-    def test_invalid(self):
-        code = "hallo"
-
-        with self.assertRaises(hedy.exceptions.HedyException):
-            hedy_translation.translate_keywords(code,
-                                                from_lang="en",
-                                                to_lang="nl",
-                                                level=self.level)
-
     # No translation because of the invalid space error
+
     def test_invalid_space(self):
         # FH, dec 23: We remove leading spaces in the translation, since the editor
         # also does this. It means dropping a space from the program but that, I think, is better

diff --git a/tests/test_translation_level/test_translation_level_02.py b/tests/test_translation_level/test_translation_level_02.py
@@ -111,15 +111,6 @@ def test_translate_back(self):
 
         self.assertEqual(expected, result)
 
-    def test_invalid(self):
-        code = "hallo"
-
-        with self.assertRaises(hedy.exceptions.HedyException):
-            hedy_translation.translate_keywords(code,
-                                                from_lang="en",
-                                                to_lang="nl",
-                                                level=self.level)
-
     def test_invalid_space(self):
         code = " print Hedy"
 
@@ -138,6 +129,15 @@ def test_echo(self):
 
         self.assertEqual(expected, result)
 
+    def test_echo_french(self):
+        code = "echo Hedy"
+
+        result = hedy_translation.translate_keywords(
+            code, "en", "fr", self.level)
+        expected = "réponds Hedy"
+
+        self.assertEqual(expected, result)
+
     @parameterized.expand(
         HedyTester.as_list_of_tuples(
             all_keywords["ask"],