Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LANGUAGE] Check translations #4867

Closed
wants to merge 49 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
64cf381
move exception into isvalid
Felienne Dec 1, 2023
372ceb4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 1, 2023
145ae6d
move more raises up and leave some notes to self
Felienne Dec 2, 2023
bd084d8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 2, 2023
4a787b0
clarify for further work
Felienne Dec 2, 2023
80d4b58
Merge branch 'main' into clean-up-invalid
Felienne Dec 4, 2023
4d97bbc
can happen when you interrupt a test!
Felienne Dec 4, 2023
3b30591
more rewriting
Felienne Dec 4, 2023
a1e7f25
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 4, 2023
3827304
Merge branch 'main' into clean-up-invalid
Felienne Dec 5, 2023
ecaa994
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2023
9677022
fix tests
Felienne Dec 5, 2023
fa73d5f
Merge branch 'clean-up-invalid' of https://github.com/hedyorg/hedy in…
Felienne Dec 5, 2023
8bd9e43
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2023
310df05
update tree rewriting (not ready yet!)
Felienne Dec 5, 2023
c7669b3
Revert "update tree rewriting (not ready yet!)"
Felienne Dec 5, 2023
a82a77d
Revert "Merge branch 'clean-up-invalid' of https://github.com/hedyorg…
Felienne Dec 5, 2023
1597c94
merge
Felienne Dec 5, 2023
a494b46
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2023
0c37387
move all commands
Felienne Dec 5, 2023
4fbedfb
move all exceptions into isvalid
Felienne Dec 6, 2023
4edbb3b
revert grammar changes
Felienne Dec 6, 2023
947d78a
update
Felienne Dec 6, 2023
8ce8e93
one more grammar change
Felienne Dec 6, 2023
cf1b87f
finish tests
Felienne Dec 6, 2023
b4648b2
Merge branch 'clean-up-invalid' of https://github.com/hedyorg/hedy in…
Felienne Dec 6, 2023
663bc23
Merge branch 'main' into clean-up-invalid
Felienne Dec 6, 2023
fe0355e
manual merge
Felienne Dec 6, 2023
9bb6c94
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 6, 2023
1fc39fc
remove unused code
Felienne Dec 6, 2023
a37f825
Merge branch 'clean-up-invalid' of https://github.com/hedyorg/hedy in…
Felienne Dec 6, 2023
61231fe
refactor
Felienne Dec 6, 2023
3c3936c
ok ok precommit...!
Felienne Dec 6, 2023
e6d3202
remove broken programs
Felienne Dec 6, 2023
150e17e
add note to self
Felienne Dec 7, 2023
e1925db
skip invalid and add error production (WIP)
Felienne Dec 7, 2023
08575e6
add fr test
Felienne Dec 7, 2023
156733d
remove constant
Felienne Dec 7, 2023
98bfc05
change tester and add arabic test
Felienne Dec 7, 2023
b47539d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 7, 2023
0a1a690
update tester by skipping some exceptions
Felienne Dec 7, 2023
728c88d
add arabic back in and add level
Felienne Dec 7, 2023
7e010a0
fix arabic token issue
Felienne Dec 7, 2023
4158cab
special case for random too and refactor
Felienne Dec 7, 2023
fda0228
Merge branch 'main' into check-translations
Felienne Dec 8, 2023
9826264
Merge branch 'check-translations' of https://github.com/hedyorg/hedy …
Felienne Dec 8, 2023
52d9ccb
update tests
Felienne Dec 8, 2023
25bea7e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 8, 2023
40b269f
update tests
Felienne Dec 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,12 @@ def valid_echo(ast):
class IsComplete(Filter):
def __init__(self, level):
self.level = level

# ah so we actually have 2 types of "error productions"!
# true ones that live in the grammar like error_ask_dep_2
# and these ones where the parser combines valid and not valid
# versions, like print: _PRINT (text)?

# print, ask and echo can miss arguments and then are not complete
# used to generate more informative error messages
# tree is transformed to a node of [True] or [False, args, line_number]
Expand Down
4 changes: 0 additions & 4 deletions hedy_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
ALL_LANGUAGES = {}
ALL_KEYWORD_LANGUAGES = {}

# Todo TB -> We create this list manually, but it would be nice if we find
# a way to automate this as well
NON_LATIN_LANGUAGES = ['ar', 'bg', 'bn', 'el', 'fa', 'hi', 'he', 'pa_PK', 'ru', 'zh_Hans']

# Babel has a different naming convention than Weblate and doesn't support some languages -> fix this manually
CUSTOM_BABEL_LANGUAGES = {'pa_PK': 'pa_Arab_PK',
'kmr': 'ku_TR',
Expand Down
41 changes: 26 additions & 15 deletions hedy_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def get_target_keyword(keyword_dict, keyword):

def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
""" "Return code with keywords translated to language of choice in level of choice"""

if input_string == "":
return " " # empty string is True, so output something else that looks like the empty string

Expand All @@ -100,7 +101,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
ordered_rules = reversed(sorted(translator.rules, key=operator.attrgetter("line", "start")))

# checks whether any error production nodes are present in the parse tree
hedy.is_program_valid(program_root, input_string, level, from_lang)
# hedy.is_program_valid(program_root, input_string, level, from_lang)

result = processed_input
for rule in ordered_rules:
Expand All @@ -116,7 +117,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
result = "\n".join([line for line in result.splitlines()])
result = result.replace("#ENDBLOCK", "")

# we have to reverse escaping or translating and retranslating will add an unlimied number of slashes
# we have to reverse escaping or translating and retranslating will add an unlimited number of slashes
if level >= 4:
result = result.replace("\\\\", "\\")

Expand Down Expand Up @@ -179,17 +180,13 @@ def find_keyword_in_rules(rules, keyword, start_line, end_line, start_column, en


def get_original_keyword(keyword_dict, keyword, line):
found = False
for word in keyword_dict[keyword]:
if word in line:
original = word
found = True
return word

# If we can't find the keyword, it means that it isn't part of the valid keywords for this language
# so return original instead
if found:
return original
else:
return keyword
return keyword


class Translator(Visitor):
Expand Down Expand Up @@ -239,13 +236,18 @@ def turn(self, tree):
self.add_rule("_TURN", "turn", tree)

def left(self, tree):
token = tree.children[0]
rule = Rule("left", token.line, token.column - 1, token.end_column - 2, token.value)
# somehow for some Arabic rules (left, right, random) the parser returns separate tokens instead of one!
token_start = tree.children[0]
token_end = tree.children[-1]
value = ''.join(tree.children)
rule = Rule("left", token_start.line, token_start.column - 1, token_end.end_column - 2, value)
self.rules.append(rule)

def right(self, tree):
token = tree.children[0]
rule = Rule("right", token.line, token.column - 1, token.end_column - 2, token.value)
token_start = tree.children[0]
token_end = tree.children[-1]
value = ''.join(tree.children)
rule = Rule("right", token_start.line, token_start.column - 1, token_end.end_column - 2, value)
self.rules.append(rule)

def assign_list(self, tree):
Expand All @@ -270,10 +272,19 @@ def remove(self, tree):
self.add_rule("_FROM", "from", tree)

def random(self, tree):
token = tree.children[0]
rule = Rule("random", token.line, token.column - 1, token.end_column - 2, token.value)
# somehow for Arabic tokens, we parse into separate tokens instead of one!
token_start = tree.children[0]
token_end = tree.children[-1]
value = ''.join(tree.children)
rule = Rule("random", token_start.line, token_start.column - 1, token_end.end_column - 2, value)
self.rules.append(rule)

def error_ask_dep_2(self, tree):
self.add_rule("_ASK", "ask", tree)

def error_echo_dep_2(self, tree):
self.add_rule("_ECHO", "echo", tree)

def ifs(self, tree):
self.add_rule("_IF", "if", tree)

Expand Down
57 changes: 38 additions & 19 deletions tests/Tester.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
import textwrap
import pickle
import hashlib
Expand Down Expand Up @@ -264,25 +265,6 @@ def single_level_tester(
if expected is not None:
self.assertEqual(expected, result.code)

if translate:
if lang == 'en': # if it is English
# and if the code transpiles (evidenced by the fact that we reach this
# line) we should be able to translate too

# TODO FH Feb 2022: we pick Dutch here not really fair or good practice :D
# Maybe we should do a random language?
in_dutch = hedy_translation.translate_keywords(
code, from_lang=lang, to_lang="nl", level=self.level)
back_in_english = hedy_translation.translate_keywords(
in_dutch, from_lang="nl", to_lang=lang, level=self.level).strip()
self.assert_translated_code_equal(code, back_in_english)
else: # not English? translate to it and back!
in_english = hedy_translation.translate_keywords(
code, from_lang=lang, to_lang="en", level=self.level)
back_in_org = hedy_translation.translate_keywords(
in_english, from_lang="en", to_lang=lang, level=self.level)
self.assert_translated_code_equal(code, back_in_org)

all_commands = result.commands
if expected_commands is not None:
self.assertEqual(expected_commands, all_commands)
Expand All @@ -295,6 +277,19 @@ def single_level_tester(
self.assertEqual(output, HedyTester.run_code(result))
self.assertTrue(extra_check_function(result))

# whether or not the code should give an exception,
# if it parses, it should always be possible
# to translate it, unless there is an NoIndentationException
# because in that case our preprocessor throws the error so there is no parsetree
# (todo maybe parse first?)

skipped_exceptions = [hedy.exceptions.ParseException, hedy.exceptions.NoIndentationException,
hedy.exceptions.IndentationException, hedy.exceptions.LockedLanguageFeatureException,
hedy.exceptions.CodePlaceholdersPresentException, hedy.exceptions.InvalidCommandException]

if translate and not exception in skipped_exceptions:
self.verify_translation(code, lang, level)

# all ok? -> save hash!
hash_of_run = create_hash(get_hedy_source_hash(), test_hash)
if hash_of_run:
Expand All @@ -303,6 +298,30 @@ def single_level_tester(
with open(filename, "w") as fp:
fp.write("")

def verify_translation(self, code, lang, level):
if lang == 'en': # if it is English

# pick a random language to translate to
# all = list(ALL_KEYWORD_LANGUAGES.keys()) <- this no longer really holds
# all keyword languages! TODO fix or remove

# a nice mix of latin/non-latin and l2r and r2l!
all = ['ar', 'ca', 'sq', 'bg', 'es', 'fi', 'fr', 'he', 'nl', 'hi', 'ur', 'te', 'th', 'vi', 'uk', 'tr']

to_lang = random.choice(all)

translated = hedy_translation.translate_keywords(
code, from_lang=lang, to_lang=to_lang, level=level)
back_in_english = hedy_translation.translate_keywords(
translated, from_lang=to_lang, to_lang=lang, level=level).strip()
self.assert_translated_code_equal(code, back_in_english)
else: # not English? translate to it and back!
in_english = hedy_translation.translate_keywords(
code, from_lang=lang, to_lang="en", level=level)
back_in_org = hedy_translation.translate_keywords(
in_english, from_lang="en", to_lang=lang, level=level)
self.assert_translated_code_equal(code, back_in_org)

def source_map_tester(self, code, expected_source_map: dict):
result = hedy.transpile(code, self.level, 'en')
self.assertDictEqual(result.source_map.get_compressed_mapping(), expected_source_map)
Expand Down
19 changes: 17 additions & 2 deletions tests/test_level/test_level_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,18 @@ def test_turn_left_nl(self):
lang='nl'
)

def test_turn_ar(self):
# doesn't translate, I don't know why!!
code = "استدر يسار"
expected = "t.left(90)"

self.single_level_tester(
code=code,
expected=expected,
extra_check_function=self.is_turtle(),
lang='ar'
)

def test_turn_with_text_gives_error(self):
code = textwrap.dedent("""\
turn koekoek
Expand Down Expand Up @@ -691,12 +703,14 @@ def test_one_mistake_not_skipped(self):

def test_lonely_echo_gives_LonelyEcho(self):
code = "echo wat dan?"
self.single_level_tester(code, exception=hedy.exceptions.LonelyEchoException)
self.single_level_tester(
code,
exception=hedy.exceptions.LonelyEchoException)

def test_echo_before_ask_gives_lonely_echo(self):
code = textwrap.dedent("""\
echo what can't we do?
ask time travel """)
ask time travel""")
self.single_level_tester(code, exception=hedy.exceptions.LonelyEchoException)

def test_pint_after_empty_line_gives_error_line_3(self):
Expand Down Expand Up @@ -755,6 +769,7 @@ def test_non_keyword_with_argument_gives_invalid(self):
code=code,
expected=expected,
skipped_mappings=skipped_mappings,
translate=False,
extra_check_function=lambda c: c.arguments['invalid_command'] in ['aks', 'prind'],
max_level=5,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_level/test_level_04.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def test_assign_catalan_var_name(self):

def test_place_holder_no_space(self):
# same as print for level 4
code = "print _Escape from the haunted house!_"
code = "print _ Escape from the haunted house! _"

self.multi_level_tester(
code=code,
Expand Down
3 changes: 2 additions & 1 deletion tests/test_level/test_level_08.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,8 @@ def test_if_else_no_indentation(self):

# gives the right exception for all levels even though it misses brackets
# because the indent check happens before parsing
self.multi_level_tester(code=code, exception=hedy.exceptions.NoIndentationException)
self.multi_level_tester(code=code,
exception=hedy.exceptions.NoIndentationException)

def test_if_equality_print_else_print(self):
code = textwrap.dedent("""\
Expand Down
19 changes: 10 additions & 9 deletions tests/test_translation_level/test_translation_level_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ def test_ask_dutch_english(self):

self.assertEqual(expected, result)

def test_echo_french(self):
code = "echo Hedy"

result = hedy_translation.translate_keywords(
code, "en", "fr", self.level)
expected = "réponds Hedy"

self.assertEqual(expected, result)

def test_echo_dutch_english(self):
code = "vraag stel je vraag\necho tekst"

Expand Down Expand Up @@ -137,16 +146,8 @@ def test_translate_back(self):

self.assertEqual(code, result)

def test_invalid(self):
code = "hallo"

with self.assertRaises(hedy.exceptions.HedyException):
hedy_translation.translate_keywords(code,
from_lang="en",
to_lang="nl",
level=self.level)

# No translation because of the invalid space error

def test_invalid_space(self):
# FH, dec 23: We remove leading spaces in the translation, since the editor
# also does this. It means dropping a space from the program but that, I think, is better
Expand Down
18 changes: 9 additions & 9 deletions tests/test_translation_level/test_translation_level_02.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,6 @@ def test_translate_back(self):

self.assertEqual(expected, result)

def test_invalid(self):
code = "hallo"

with self.assertRaises(hedy.exceptions.HedyException):
hedy_translation.translate_keywords(code,
from_lang="en",
to_lang="nl",
level=self.level)

def test_invalid_space(self):
code = " print Hedy"

Expand All @@ -138,6 +129,15 @@ def test_echo(self):

self.assertEqual(expected, result)

def test_echo_french(self):
code = "echo Hedy"

result = hedy_translation.translate_keywords(
code, "en", "fr", self.level)
expected = "réponds Hedy"

self.assertEqual(expected, result)

@parameterized.expand(
HedyTester.as_list_of_tuples(
all_keywords["ask"],
Expand Down
Loading
Loading