|
8 | 8 | import json
|
9 | 9 | import os
|
10 | 10 | import sys
|
11 |
| -from subprocess import call |
12 | 11 |
|
| 12 | +from requests.exceptions import HTTPError |
13 | 13 | from tqdm.auto import tqdm
|
14 | 14 | from wikidataintegrator import wdi_core
|
| 15 | +from wikidataintegrator.wdi_config import config as wdi_config |
| 16 | + |
| 17 | +wdi_config["BACKOFF_MAX_TRIES"] = 1 |
15 | 18 |
|
16 | 19 | with open("total_data.json") as f:
|
17 | 20 | current_data = json.load(f)
|
|
82 | 85 |
|
83 | 86 | possible_queries = []
|
84 | 87 | for d in data_dir_dirs:
|
85 |
| - for wt in word_types: |
86 |
| - if "./" + d + "/" + wt in [ |
87 |
| - e[: len("./" + d + "/" + wt)] for e in data_dir_elements |
| 88 | + for target_type in word_types: |
| 89 | + if "./" + d + "/" + target_type in [ |
| 90 | + e[: len("./" + d + "/" + target_type)] for e in data_dir_elements |
88 | 91 | ]:
|
89 |
| - possible_queries.append(d + "/" + wt) |
| 92 | + possible_queries.append(d + "/" + target_type) |
90 | 93 |
|
91 | 94 | queries_to_run_lists = [
|
92 | 95 | [q for q in possible_queries if q[: len(lang)] in current_languages]
|
|
98 | 101 | data_added_dict = {}
|
99 | 102 |
|
100 | 103 | for q in tqdm(queries_to_run, desc="Data updated", unit="dirs",):
|
| 104 | + lang = q.split("/")[0] |
101 | 105 | target_type = q.split("/")[1]
|
102 | 106 | query_name = "query" + target_type.title() + ".sparql"
|
103 | 107 | query_path = "./" + q + "/" + query_name
|
|
106 | 110 | query_lines = file.readlines()
|
107 | 111 |
|
108 | 112 | # First format the lines into a multi-line string and then pass this to wikidataintegrator.
|
109 |
| - print(f"Querying {q.split('/')[0]} {q.split('/')[1]}") |
110 |
| - query = wdi_core.WDFunctionsEngine.execute_sparql_query("".join(query_lines)) |
111 |
| - |
112 |
| - query_results = query["results"]["bindings"] |
113 |
| - |
114 |
| - # Format and save the resulting JSON. |
115 |
| - results_formatted = [] |
116 |
| - for r in query_results: # query_results is also a list |
117 |
| - r_dict = {k: r[k]["value"] for k in r.keys()} |
118 |
| - |
119 |
| - results_formatted.append(r_dict) |
120 |
| - |
121 |
| - with open( |
122 |
| - f"./{q.split('/')[0]}/{q.split('/')[1]}/{q.split('/')[1]}Queried.json", |
123 |
| - "w", |
124 |
| - encoding="utf-8", |
125 |
| - ) as f: |
126 |
| - json.dump(results_formatted, f, ensure_ascii=False, indent=2) |
127 |
| - |
128 |
| - # Call the corresponding formatting file and update data changes. |
129 |
| - call( |
130 |
| - ["python", f"./{q.split('/')[0]}/{q.split('/')[1]}/format_{q.split('/')[1]}"], |
131 |
| - shell=True, |
132 |
| - ) |
133 |
| - |
134 |
| - with open( |
135 |
| - f"./../Keyboards/LanguageKeyboards/{q.split('/')[0]}/{q.split('/')[1]}.json" |
136 |
| - ) as f: |
137 |
| - new_keyboard_data = json.load(f) |
138 |
| - |
139 |
| - data_added_dict[q.split("/")[0]][q.split("/")[1]] = ( |
140 |
| - len(new_keyboard_data) - current_data[q.split("/")[0]][q.split("/")[1]] |
141 |
| - ) |
142 |
| - |
143 |
| - current_data[q.split("/")[0]][q.split("/")[1]] = len(new_keyboard_data) |
144 |
| - |
145 |
| -# Update total_data.json |
146 |
| -with open("./total_data.json", "w", encoding="utf-8",) as f: |
147 |
| - json.dump(current_data, f, ensure_ascii=False, indent=2) |
148 |
| - |
149 |
| -# Update data_updates.txt |
150 |
| -data_added_string = """""" |
151 |
| -for l in data_added_dict: |
152 |
| - data_added_string += f"\n{l}" |
153 |
| - for w in word_types: |
154 |
| - if data_added_dict[l][w] == 0: |
155 |
| - pass |
156 |
| - elif data_added_dict[l][w] == 1: # remove the s for label |
157 |
| - data_added_string += f"{data_added_dict[l][w]} {w[:-1]}," |
158 |
| - else: |
159 |
| - data_added_string += f"{data_added_dict[l][w]} {w}," |
160 |
| - data_added_string = data_added_string[:-1] # remove the last comma |
161 |
| - |
162 |
| -with open("data_updates.txt", "w+") as f: |
163 |
| - f.writelines(data_added_string) |
| 113 | + print(f"Querying {lang} {target_type}") |
| 114 | + query = None |
| 115 | + try: |
| 116 | + query = wdi_core.WDFunctionsEngine.execute_sparql_query("".join(query_lines)) |
| 117 | + except HTTPError as err: |
| 118 | + print(f"HTTPError with {query_name}: {err}") |
| 119 | + |
| 120 | + if query is None: |
| 121 | + print(f"Nothing returned by the WDQS server for {query_name}") |
| 122 | + |
| 123 | + else: |
| 124 | + query_results = query["results"]["bindings"] |
| 125 | + |
| 126 | + # Format and save the resulting JSON. |
| 127 | + results_formatted = [] |
| 128 | + for r in query_results: # query_results is also a list |
| 129 | + r_dict = {k: r[k]["value"] for k in r.keys()} |
| 130 | + |
| 131 | + results_formatted.append(r_dict) |
| 132 | + |
| 133 | + with open( |
| 134 | + f"./{lang}/{target_type}/{target_type}Queried.json", "w", encoding="utf-8", |
| 135 | + ) as f: |
| 136 | + json.dump(results_formatted, f, ensure_ascii=False, indent=2) |
| 137 | + |
| 138 | + # Call the corresponding formatting file and update data changes. |
| 139 | + os.system(f"python ./{lang}/{target_type}/format_{target_type}.py") |
| 140 | + |
| 141 | + with open( |
| 142 | + f"./../Keyboards/LanguageKeyboards/{lang}/Data/{target_type}.json" |
| 143 | + ) as f: |
| 144 | + new_keyboard_data = json.load(f) |
| 145 | + |
| 146 | + if lang not in data_added_dict.keys(): |
| 147 | + data_added_dict[lang] = {} |
| 148 | + data_added_dict[lang][target_type] = ( |
| 149 | + len(new_keyboard_data) - current_data[lang][target_type] |
| 150 | + ) |
| 151 | + |
| 152 | + current_data[lang][target_type] = len(new_keyboard_data) |
| 153 | + |
| 154 | + # Update total_data.json. |
| 155 | + with open("./total_data.json", "w", encoding="utf-8",) as f: |
| 156 | + json.dump(current_data, f, ensure_ascii=False, indent=2) |
| 157 | + |
| 158 | + # Update data_updates.txt. |
| 159 | + data_added_string = "" |
| 160 | + language_keys = list(data_added_dict.keys()) |
| 161 | + for l in language_keys: |
| 162 | + data_added_string += f"- {l} " if l == language_keys[0] else f"\n- {l} " |
| 163 | + for w in word_types: |
| 164 | + if data_added_dict[l][w] == 0: |
| 165 | + pass |
| 166 | + elif data_added_dict[l][w] == 1: # remove the s for label |
| 167 | + data_added_string += f"{data_added_dict[l][w]} {w[:-1]}," |
| 168 | + else: |
| 169 | + data_added_string += f"{data_added_dict[l][w]} {w}," |
| 170 | + data_added_string = data_added_string[:-1] # remove the last comma |
| 171 | + |
| 172 | + with open("data_updates.txt", "w+") as f: |
| 173 | + f.writelines(data_added_string) |
0 commit comments