forked from sinaghiassian/OffpolicyAlgorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_data.py
101 lines (87 loc) · 4.78 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import os
import numpy as np
from Learning import learn
from Plotting.plot_params import EXP_ATTRS
from Plotting.plot_utils import make_params, make_current_params, load_and_replace_large_nan_inf, \
load_best_perf_json, load_best_rerun_params, make_res_path
from utils import create_name_for_save_load, Configuration
def save_perf_over_alpha(alg, exp, auc_or_final, sp, rerun=False):
fp_list, sp_list, tp_list, fop_list, _ = make_params(alg, exp)
res_path = make_res_path(alg, exp)
mean_over_alpha, stderr_over_alpha = np.zeros(len(fp_list)), np.zeros(len(fp_list))
best_fp, best_tp, best_fop = load_best_rerun_params(alg, exp, auc_or_final, sp) if rerun else (0, 0, 0)
for tp in tp_list:
for fop in fop_list:
current_params = make_current_params(alg, sp, tp, fop)
for i, fp in enumerate(fp_list):
current_params['alpha'] = fp
load_name = os.path.join(res_path, create_name_for_save_load(current_params))
perf = np.load(f"{load_name}_mean_stderr_{auc_or_final}.npy")
if rerun and fp == best_fp and tp == best_tp and fop == best_fop:
perf = np.load(f"{load_name}_mean_stderr_{auc_or_final}_rerun.npy")
mean_over_alpha[i], stderr_over_alpha[i] = perf[0], perf[1]
save_name = os.path.join(res_path, create_name_for_save_load(current_params, excluded_params=['alpha']))
postfix = ''
if rerun and tp == best_tp and fop == best_fop:
postfix = '_rerun'
np.save(f"{save_name}_mean_{auc_or_final}_over_alpha{postfix}", mean_over_alpha)
np.save(f"{save_name}_stderr_{auc_or_final}_over_alpha{postfix}", stderr_over_alpha)
def find_best_perf(alg, exp, auc_or_final, sp):
exp_attrs = EXP_ATTRS[exp](exp)
fp_list, _, tp_list, fop_list, res_path = make_params(alg, exp)
best_params = {}
best_perf, best_fp, best_sp, best_tp, best_fop = np.inf, np.inf, np.inf, np.inf, np.inf
for fop in fop_list:
for tp in tp_list:
current_params = make_current_params(alg, sp, tp, fop)
load_name = os.path.join(res_path, create_name_for_save_load(current_params, excluded_params=[
'alpha']) + f'_mean_{auc_or_final}_over_alpha.npy')
current_perf = load_and_replace_large_nan_inf(
load_name, large=exp_attrs.learning_starting_point, replace_with=exp_attrs.over_limit_replacement)
min_perf = min(current_perf)
if min_perf < best_perf:
best_perf = min_perf
best_perf_idx = int(np.nanargmin(current_perf))
best_fp = fp_list[best_perf_idx]
best_params = current_params
best_params['alpha'] = best_fp
return best_params
def save_best_perf_in_json(alg, exp, best_params, auc_or_final, sp):
fp_list, _, tp_list, fop_list, res_path = make_params(alg, exp)
exp_path = res_path.replace('Results', 'Experiments')
json_exp = os.path.join(exp_path, f"{alg}.json")
with open(json_exp, 'r') as f:
json_exp = json.load(f)
json_exp['meta_parameters'] = best_params
save_name = os.path.join(res_path, f"{auc_or_final}_{sp}.json")
with open(save_name, 'wt') as f:
json.dump(json_exp, f, indent=4)
def run_learning_with_best_perf(alg, exp, auc_or_final, sp):
res_path = os.path.join(os.getcwd(), 'Results', exp, alg)
best_perf_jsn = load_best_perf_json(alg, exp, sp, auc_or_final)
param_dict = best_perf_jsn['meta_parameters']
param_dict['algorithm'] = alg
param_dict['task'] = best_perf_jsn['task']
param_dict['environment'] = best_perf_jsn['environment']
param_dict['num_steps'] = best_perf_jsn['number_of_steps']
param_dict['num_of_runs'] = best_perf_jsn['number_of_runs']
param_dict['sub_sample'] = best_perf_jsn['sub_sample']
param_dict['save_path'] = res_path
param_dict['save_value_function'] = False
param_dict['rerun'] = True
param_dict['render'] = False
config = Configuration(param_dict)
learn(config)
def process_data(**kwargs):
for exp in kwargs['exps']:
for alg in kwargs['algs']:
for auc_or_final in kwargs['auc_or_final']:
for sp in kwargs['sp_list']:
print(f"\nStarted re-running {exp}, {alg} lmbda_or_zeta: {sp}, {auc_or_final} ...")
save_perf_over_alpha(alg, exp, auc_or_final, sp)
best_params = find_best_perf(alg, exp, auc_or_final, sp)
save_best_perf_in_json(alg, exp, best_params, auc_or_final, sp)
run_learning_with_best_perf(alg, exp, auc_or_final, sp)
save_perf_over_alpha(alg, exp, auc_or_final, sp, rerun=True)
print(f"Finished re-running {exp}, {alg} {best_params}")