-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
132 lines (112 loc) · 4.4 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import json
import pandas as pd
import shutil
from contextlib import ContextDecorator
import time
def print_params(params):
print("{:-^80}".format("params"))
print("Beginning extraction "
+ "using the following configuration:\n")
for param, value in params.items():
print("\t{:>13}: {}".format(param, value))
print()
print("-" * 80)
def combine_extractions(one, two):
all_keys = set(one).union(set(two))
combined = {}
for key in all_keys:
combined[key] = one.get(key, []) + two.get(key, [])
return combined
def save_global_output_triples(triples, params):
with open(params["templates"], 'r') as f:
gt = json.load(f)
# "index":["sentence", ["triple1", "triple2",]]
result = {idx:[gt[idx][0], [str(triple) for triple in triples[idx]]] for idx in triples}
output_file_name = f"{params['log_type']}_{params['openie']}.json"
output_file = os.path.join(
params['results_dir'],
output_file_name,
)
with open(output_file, 'w') as out:
json.dump(result, out, indent=4, sort_keys=False)
def save_log_triples(log_idx, triples, params):
output_file_name = f"{params['log_type']}_{params['openie']}.tsv"
output_file = os.path.join(
params['results_dir'],
output_file_name
)
apend_write = 'a' if os.path.exists(output_file) else 'w'
with open(output_file, apend_write) as f:
for triple in triples:
f.write(f'{params["id"]}\t{log_idx}\t{triple}\n')
def save_results(evaluators, params):
results = {'Logs':params['log_type'], 'OIE':params['openie']}
for eval_metric in evaluators:
eval_result = evaluators[eval_metric].metrics()
update_keys = {f'{eval_metric} {k}':v for k, v in eval_result.items()}
results.update(update_keys)
file_name =\
f"metrics_{params['log_type']}_{params['openie']}.csv"
results_file_path = os.path.join(
params['results_dir'],
file_name,
)
df = pd.DataFrame.from_records([results])
if not os.path.isfile(results_file_path):
df.to_csv(results_file_path, header=True)
else:
df.to_csv(results_file_path, mode='a', header=False)
import re
varx_pattern = re.compile(r'VAR\d+')
def remove_varx(triples_result):
for idx in triples_result:
for triple in triples_result[idx]:
triple.pred = re.sub(varx_pattern, 'VARX', triple.pred)
if hasattr(triple, 'arg1'):
triple.arg1 = re.sub(varx_pattern, 'VARX', triple.arg1)
triple.arg2 = re.sub(varx_pattern, 'VARX', triple.arg2)
else:
triple.args = map(
lambda x: re.sub(varx_pattern, 'VARX', x),
triple.args,
)
def file_handling(params):
if "templates" in params:
if not os.path.exists(params['templates']):
raise FileNotFoundError(
f"File {params['templates']} doesn't exist. "
+ "Please provide the templates path."
)
else:
raise FileNotFoundError(
"Input templates are necesary to run LogIE. "
+ "Please provide the log templates path."
)
if "raw_logs" in params:
if not os.path.exists(params['raw_logs']):
raise FileNotFoundError(
f"File {params['raw_logs']} doesn't exist. "
+ "Please provide the raw logs path."
)
if params['save_output'] or params['evaluation']:
# Checks if the experiment id already exists
if os.path.exists(params["id_dir"]) and not params["force"]:
raise FileExistsError(
f"directory '{params['id_dir']} already exists. "
+ "Run with --force to overwrite."
+ f"If --force is used, you could lose your training results."
)
if os.path.exists(params["id_dir"]):
shutil.rmtree(params["id_dir"])
for target_dir in ['id_dir', 'results_dir']:
os.makedirs(params[target_dir])
class Timing(ContextDecorator):
def __init__(self, name):
self.name = name
def __enter__(self):
self.ini = time.perf_counter()
def __exit__(self, exc_type, exc_value, traceback):
fin = time.perf_counter()
result = fin - self.ini
print(f"Section {self.name} took {result} seconds.")