Skip to content

Commit

Permalink
#95 update formatting files to work with update_data.py
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Jan 15, 2022
1 parent 3fd4006 commit fe3ec56
Show file tree
Hide file tree
Showing 14 changed files with 296 additions and 121 deletions.
30 changes: 21 additions & 9 deletions Data/French/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("nounsQueried.json") as f:
nouns_list = json.load(f)
file_path = sys.argv[0]
if "French/nouns/" not in file_path:
with open("nounsQueried.json") as f:
nouns_list = json.load(f)
else: # is being called by update_data.py
with open("./French/nouns/nounsQueried.json") as f:
nouns_list = json.load(f)


def map_genders(wikidata_gender):
Expand All @@ -18,7 +24,7 @@ def map_genders(wikidata_gender):
"""
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
if wikidata_gender in ["feminine", "Q1775415"]:
elif wikidata_gender in ["feminine", "Q1775415"]:
return "F"
else:
return "" # nouns could have a gender that is not valid as an attribute
Expand Down Expand Up @@ -105,11 +111,17 @@ def order_annotations(annotation):

nouns_formatted = collections.OrderedDict(sorted(nouns_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/French/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
if "French/nouns/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/French/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/French/Data/nouns.json", "w", encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file nouns.json with {len(nouns_formatted)} nouns.")
28 changes: 20 additions & 8 deletions Data/French/verbs/format_verbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("verbsQueried.json") as f:
verbs_list = json.load(f)
file_path = sys.argv[0]
if "French/verbs/" not in file_path:
with open("verbsQueried.json") as f:
verbs_list = json.load(f)
else: # is being called by update_data.py
with open("./French/verbs/verbsQueried.json") as f:
verbs_list = json.load(f)

verbs_formatted = {}

Expand Down Expand Up @@ -59,11 +65,17 @@ def fix_tense(tense):

verbs_formatted = collections.OrderedDict(sorted(verbs_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/French/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)
if "French/verbs/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/French/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/French/Data/verbs.json", "w", encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file verbs.json with {len(verbs_formatted)} verbs.")
32 changes: 22 additions & 10 deletions Data/German/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("nounsQueried.json") as f:
nouns_list = json.load(f)
file_path = sys.argv[0]
if "German/nouns/" not in file_path:
with open("nounsQueried.json") as f:
nouns_list = json.load(f)
else: # is being called by update_data.py
with open("./German/nouns/nounsQueried.json") as f:
nouns_list = json.load(f)


def map_genders(wikidata_gender):
Expand All @@ -23,9 +29,9 @@ def map_genders(wikidata_gender):
"""
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
if wikidata_gender in ["feminine", "Q1775415"]:
elif wikidata_gender in ["feminine", "Q1775415"]:
return "F"
if wikidata_gender in ["neuter", "Q1775461"]:
elif wikidata_gender in ["neuter", "Q1775461"]:
return "N"
else:
return "" # nouns could have a gender that is not valid as an attribute
Expand Down Expand Up @@ -173,11 +179,17 @@ def order_annotations(annotation):

nouns_formatted = collections.OrderedDict(sorted(nouns_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/German/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
if "German/nouns/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/German/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/German/Data/nouns.json", "w", encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file nouns.json with {len(nouns_formatted)} nouns.")
30 changes: 22 additions & 8 deletions Data/German/prepositions/format_prepositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("prepositionsQueried.json") as f:
prepositions_list = json.load(f)
file_path = sys.argv[0]
if "German/prepositions/" not in file_path:
with open("prepositionsQueried.json") as f:
prepositions_list = json.load(f)
else: # is being called by update_data.py
with open("./German/prepositions/prepositionsQueried.json") as f:
prepositions_list = json.load(f)


def convert_cases(case):
Expand Down Expand Up @@ -70,11 +76,19 @@ def order_annotations(annotation):

prepositions_formatted = collections.OrderedDict(sorted(prepositions_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/German/Data/prepositions.json",
"w",
encoding="utf-8",
) as f:
json.dump(prepositions_formatted, f, ensure_ascii=False, indent=2)
if "German/prepositions/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/German/Data/prepositions.json",
"w",
encoding="utf-8",
) as f:
json.dump(prepositions_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/German/Data/prepositions.json",
"w",
encoding="utf-8",
) as f:
json.dump(prepositions_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file prepositions.json with {len(prepositions_formatted)} prepositions.")
28 changes: 20 additions & 8 deletions Data/German/verbs/format_verbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("verbsQueried.json") as f:
verbs_list = json.load(f)
file_path = sys.argv[0]
if "German/verbs/" not in file_path:
with open("verbsQueried.json") as f:
verbs_list = json.load(f)
else: # is being called by update_data.py
with open("./German/verbs/verbsQueried.json") as f:
verbs_list = json.load(f)

verbs_formatted = {}

Expand Down Expand Up @@ -133,11 +139,17 @@ def assign_past_participle(verb, tense):

verbs_formatted = collections.OrderedDict(sorted(verbs_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/German/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)
if "German/verbs/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/German/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/German/Data/verbs.json", "w", encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file verbs.json with {len(verbs_formatted)} verbs.")
32 changes: 23 additions & 9 deletions Data/Portuguese/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("nounsQueried.json") as f:
nouns_list = json.load(f)
file_path = sys.argv[0]
if "Portuguese/nouns/" not in file_path:
with open("nounsQueried.json") as f:
nouns_list = json.load(f)
else: # is being called by update_data.py
with open("./Portuguese/nouns/nounsQueried.json") as f:
nouns_list = json.load(f)


def map_genders(wikidata_gender):
Expand All @@ -18,7 +24,7 @@ def map_genders(wikidata_gender):
"""
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
if wikidata_gender in ["feminine", "Q1775415"]:
elif wikidata_gender in ["feminine", "Q1775415"]:
return "F"
else:
return "" # nouns could have a gender that is not valid as an attribute
Expand Down Expand Up @@ -105,11 +111,19 @@ def order_annotations(annotation):

nouns_formatted = collections.OrderedDict(sorted(nouns_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/Portuguese/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
if "Portuguese/nouns/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/Portuguese/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/Portuguese/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file nouns.json with {len(nouns_formatted)} nouns.")
30 changes: 22 additions & 8 deletions Data/Portuguese/verbs/format_verbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("verbsQueried.json") as f:
verbs_list = json.load(f)
file_path = sys.argv[0]
if "Portuguese/verbs/" not in file_path:
with open("verbsQueried.json") as f:
verbs_list = json.load(f)
else: # is being called by update_data.py
with open("./Portuguese/verbs/verbsQueried.json") as f:
verbs_list = json.load(f)

verbs_formatted = {}

Expand Down Expand Up @@ -52,11 +58,19 @@

verbs_formatted = collections.OrderedDict(sorted(verbs_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/Portuguese/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)
if "Portuguese/verbs/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/Portuguese/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/Portuguese/Data/verbs.json",
"w",
encoding="utf-8",
) as f:
json.dump(verbs_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file verbs.json with {len(verbs_formatted)} verbs.")
32 changes: 22 additions & 10 deletions Data/Russian/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@

import collections
import json
import sys

with open("nounsQueried.json") as f:
nouns_list = json.load(f)
file_path = sys.argv[0]
if "Russian/nouns/" not in file_path:
with open("nounsQueried.json") as f:
nouns_list = json.load(f)
else: # is being called by update_data.py
with open("./Russian/nouns/nounsQueried.json") as f:
nouns_list = json.load(f)


def map_genders(wikidata_gender):
Expand All @@ -23,9 +29,9 @@ def map_genders(wikidata_gender):
"""
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
if wikidata_gender in ["feminine", "Q1775415"]:
elif wikidata_gender in ["feminine", "Q1775415"]:
return "F"
if wikidata_gender in ["neuter", "Q1775461"]:
elif wikidata_gender in ["neuter", "Q1775461"]:
return "N"
else:
return "" # nouns could have a gender that is not valid as an attribute
Expand Down Expand Up @@ -173,11 +179,17 @@ def order_annotations(annotation):

nouns_formatted = collections.OrderedDict(sorted(nouns_formatted.items()))

with open(
"../../../Keyboards/LanguageKeyboards/Russian/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
if "Russian/nouns/" not in file_path:
with open(
"../../../Keyboards/LanguageKeyboards/Russian/Data/nouns.json",
"w",
encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)
else: # is being called by update_data.py
with open(
"../Keyboards/LanguageKeyboards/Russian/Data/nouns.json", "w", encoding="utf-8",
) as f:
json.dump(nouns_formatted, f, ensure_ascii=False, indent=2)

print(f"Wrote file nouns.json with {len(nouns_formatted)} nouns.")
Loading

0 comments on commit fe3ec56

Please sign in to comment.