Skip to content

Commit

Permalink
#95 Arguments for update data and pass QIDs in formatting scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Jan 12, 2022
1 parent c1f28b8 commit 3713d00
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 37 deletions.
6 changes: 3 additions & 3 deletions Data/French/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ def map_genders(wikidata_gender):
"""
Maps those genders from Wikidata to succinct versions.
"""
if wikidata_gender == "masculine":
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
elif wikidata_gender == "feminine":
if wikidata_gender in ["feminine", "Q1775415"]:
return "F"
else:
return "" # necessary as French has words that are incorrectly marked common
return "" # nouns could have a gender that is not valid as an attribute


def order_annotations(annotation):
Expand Down
8 changes: 5 additions & 3 deletions Data/German/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ def map_genders(wikidata_gender):
wikidata_gender : str
The gender of the noun that was queried from WikiData
"""
if wikidata_gender == "masculine":
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
if wikidata_gender == "feminine":
if wikidata_gender in ["feminine", "Q1775415"]:
return "F"
if wikidata_gender == "neuter":
if wikidata_gender in ["neuter", "Q1775461"]:
return "N"
else:
return "" # nouns could have a gender that is not valid as an attribute


def order_annotations(annotation):
Expand Down
8 changes: 5 additions & 3 deletions Data/German/prepositions/format_prepositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ def convert_cases(case):
Converts cases as found on Wikidata to more succinct versions.
"""
case = case.split(" case")[0]
if case == "accusative":
if case in ["accusative", "Q146078"]:
return "Akk"
if case == "dative":
elif case in ["dative", "Q145599"]:
return "Dat"
if case == "genitive":
elif case in ["genitive", "Q146233"]:
return "Gen"
else:
return ""


def order_annotations(annotation):
Expand Down
6 changes: 3 additions & 3 deletions Data/Portuguese/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ def map_genders(wikidata_gender):
"""
Maps those genders from Wikidata to succinct versions.
"""
if wikidata_gender == "masculine":
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
elif wikidata_gender == "feminine":
if wikidata_gender in ["feminine", "Q1775415"]:
return "F"
else:
return ""
return "" # nouns could have a gender that is not valid as an attribute


def order_annotations(annotation):
Expand Down
8 changes: 4 additions & 4 deletions Data/Russian/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ def map_genders(wikidata_gender):
wikidata_gender : str
The gender of the noun that was queried from WikiData
"""
if wikidata_gender == "masculine":
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
elif wikidata_gender == "feminine":
if wikidata_gender in ["feminine", "Q1775415"]:
return "F"
elif wikidata_gender == "neuter":
if wikidata_gender in ["neuter", "Q1775461"]:
return "N"
else:
return ""
return "" # nouns could have a gender that is not valid as an attribute


def order_annotations(annotation):
Expand Down
14 changes: 6 additions & 8 deletions Data/Russian/prepositions/format_prepositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,18 @@ def convert_cases(case):
Converts cases as found on Wikidata to more succinct versions.
"""
case = case.split(" case")[0]
if case == "accusative":
if case in ["accusative", "Q146078"]:
return "Akk"
elif case == "dative":
elif case in ["dative", "Q145599"]:
return "Dat"
elif case == "genitive":
elif case in ["genitive", "Q146233"]:
return "Gen"
elif case == "instrumental":
elif case in ["instrumental", "Q192997"]:
return "Ins"
elif case == "prepositional":
elif case in ["prepositional", "Q2114906"]:
return "Pre"
elif case == "locative":
elif case in ["locative", "Q202142"]:
return "Loc"
elif case == "nominative":
return "Nom"
else:
return ""

Expand Down
6 changes: 3 additions & 3 deletions Data/Spanish/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ def map_genders(wikidata_gender):
"""
Maps those genders from Wikidata to succinct versions.
"""
if wikidata_gender == "masculine":
if wikidata_gender in ["masculine", "Q499327"]:
return "M"
elif wikidata_gender == "feminine":
if wikidata_gender in ["feminine", "Q1775415"]:
return "F"
else:
return ""
return "" # nouns could have a gender that is not valid as an attribute


def order_annotations(annotation):
Expand Down
6 changes: 4 additions & 2 deletions Data/Swedish/nouns/format_nouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ def map_genders(wikidata_gender):
"""
Maps those genders from Wikidata to succinct versions.
"""
if wikidata_gender == "common gender":
if wikidata_gender in ["common gender", "Q1305037"]:
return "C"
if wikidata_gender == "neuter":
if wikidata_gender in ["neuter", "Q1775461"]:
return "N"
else:
return "" # nouns could have a gender that is not valid as an attribute


def order_annotations(annotation):
Expand Down
74 changes: 66 additions & 8 deletions Data/update_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,42 @@

import json
import os
import sys

from tqdm.auto import tqdm
from wikidataintegrator import wdi_core

with open("total_data.json") as f:
current_data = json.load(f)

current_languages = list(current_data.keys())
word_types = ["nouns", "verbs", "prepositions"]

language = None
word_type = None
if len(sys.argv) == 2:
arg = sys.argv[1]
if arg in current_languages:
language = arg
elif arg in word_types:
word_type = arg
else:
InterruptedError(
""""
An invalid argument was specified.
For languages, please choose from those found as keys in total_data.json.
For grammatical types, please choose from nouns, verbs or prepositions.
"""
)

elif len(sys.argv) == 3:
language = sys.argv[1]
word_type = sys.argv[2]

print(language)
print(word_type)

# Derive Data directory elements for potential queries.
data_dir_elements = []

for path, _, files in os.walk("."):
Expand All @@ -28,11 +60,28 @@
}
)

word_types = ["nouns", "verbs", "prepositions"]

with open("total_data.json") as f:
current_data = json.load(f)
current_languages = list(current_data.keys())
# Subset current_languages and word_types if arguments have been passed.
if language is not None:
if language in current_languages:
current_languages = [l for l in current_languages if l == language]
else:
InterruptedError(
""""
An invalid language was specified.
Please choose from those found as keys in total_data.json.
"""
)

if word_type is not None:
if word_type in word_types:
word_types = [w for w in word_types if w == word_type]
else:
InterruptedError(
""""
An invalid grammatical type was specified.
Please choose from nouns, verbs or prepositions.
"""
)

possible_queries = []
for d in data_dir_dirs:
Expand All @@ -58,9 +107,18 @@
query_lines = file.readlines()

# First format the lines into a multi-line string and then pass this to wikidataintegrator.
query_results = wdi_core.WDFunctionsEngine.execute_sparql_query(
print(f"Querying {q.split('/')[0]} {q.split('/')[1]}")
query = wdi_core.WDFunctionsEngine.execute_sparql_query(
"""{}""".format("".join(query_lines))
)

print(query_results)
print(query_path)
query_results = query["results"]["bindings"]

results_formatted = []
for r in query_results: # query_results is also a list
r_dict = {k: r[k]["value"] for k in r.keys()}

results_formatted.append(r_dict)

with open("./example.json", "w", encoding="utf-8",) as f:
json.dump(results_formatted, f, ensure_ascii=False, indent=2)

0 comments on commit 3713d00

Please sign in to comment.