-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimization_function.py
194 lines (156 loc) · 7.71 KB
/
optimization_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import csv
import os
import json
import pandas as pd
executed_configurations = set()
error_log = []
#this below log function will maintain the single log fill of all the kernels
def log_configuration_single_file(log_file, C, kernel_value, gamma, epsilon, degree=None, coef0=None, test_split=None, mean_error=None, base_name=None):
with open(log_file, 'a', newline='') as f:
writer = csv.writer(f)
# Create a list to hold the parameters
params = [C, kernel_value, gamma, epsilon]
# Add degree and coef0 only if they are provided (not None)
if degree is not None:
params.append(degree)
if coef0 is not None:
params.append(coef0)
if test_split is not None:
params.append(test_split)
if mean_error is not None:
params.append(mean_error)
if base_name is not None:
params.append(base_name)
writer.writerow(params)
#calling as
# log_configuration_single_file(log_file, C, kernel, gamma, epsilon, degree if 'degree' in config else None, coef0 if 'coef0' in config else None, test_size, mean_3d_error_testing, base_name)
# Load previously executed configurations from log file
# def load_executed_configurations(log_file):
# if os.path.exists(log_file):
# with open(log_file, 'r') as file:
# return json.load(file)
# return {}
def load_executed_configurations(log_file):
executed_configs = set() # Using a set for fast lookups
try:
with open(log_file, 'r') as file:
reader = csv.reader(file)
for row in reader:
if row: # Check if the row is not empty
executed_configs.add(tuple(row)) # Add as a tuple to the set
except Exception as e:
print(f"Error reading the log file {log_file}: {e}")
return executed_configs
# def load_executed_configurations(log_file):
# # Check if file exists
# if not os.path.exists(log_file):
# print(f"Log file '{log_file}' does not exist.")
# return {}
# configurations = []
# try:
# with open(log_file, 'r') as file:
# reader = csv.reader(file)
# for row in reader:
# if row: # Check if the row is not empty
# configurations.append(row) # Append the valid row
# except Exception as e:
# print(f"Error reading the log file {log_file}: {e}")
# return configurations
# Save executed configuration to log file
def save_executed_configurations(log_file, executed_configurations):
with open(log_file, 'w') as file:
json.dump(executed_configurations, file, indent=4)
def log_configuration(log_file, C, kernel_value, gamma, epsilon, degree, coef0, test_split, mean_error, dataset_name):
with open(log_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([dataset_name, C, kernel_value, gamma, epsilon, degree, coef0, test_split, mean_error]) # Include dataset_name
# Check if this configuration has already been executed
def is_executed(config, executed_configurations):
config_str = json.dumps(config, sort_keys=True) # Convert dict to string for comparison
return config_str in executed_configurations
# Mark this configuration as executed
def mark_executed(config, executed_configurations, val_error, test_error):
config_str = json.dumps(config, sort_keys=True)
executed_configurations[config_str] = {
"val_error": val_error,
"test_error": test_error
}
def update_executed_configurations(log_file, config):
# Load existing configurations
executed_configurations = load_executed_configurations(log_file)
# Add the new configuration
executed_configurations.append(config)
# Save the updated list back to the log file
with open(log_file, 'w') as file:
json.dump(executed_configurations, file)
def is_configuration_executed(log_file, base_name, config):
# Check if the log file exists
if not os.path.exists(log_file):
print(f"Log file {log_file} does not exist.")
return False # If the log file does not exist, return False
# Load the log file into a DataFrame
try:
log_df = pd.read_csv(log_file)
except Exception as e:
print(f"Error reading log file {log_file}: {e}")
return False
# Print the columns and a few rows for debugging
#print("Columns in log DataFrame:", log_df.columns)
#print("First few rows in log DataFrame:", log_df.head())
# Check if the required columns exist in the log file
required_columns = ['Dataset', 'C', 'kernel', 'gamma', 'epsilon', 'degree', 'coef0']
missing_columns = [col for col in required_columns if col not in log_df.columns]
if missing_columns:
print(f"Missing columns in the log file: {missing_columns}")
return False
# Create a condition to check if the specific configuration already exists
condition = (
(log_df['Dataset'] == base_name) &
(log_df['C'] == config.get('C')) &
(log_df['kernel'] == config.get('kernel')) &
(log_df['gamma'] == config.get('gamma')) &
(log_df['epsilon'] == config.get('epsilon')) &
(log_df['degree'] == config.get('degree', None)) &
(log_df['coef0'] == config.get('coef0', None))
)
# Check if any rows match the condition
config_exists = log_df[condition].shape[0] > 0
if config_exists:
print(f"Configuration for dataset '{base_name}' with these parameters already exists in the log.")
else:
print(f"Configuration for dataset '{base_name}' with these parameters does not exist in the log.")
return config_exists
#both are working before above, revert if above same not work
def is_executedc1(config, base_name):
# Convert lists within the config to tuples to make them hashable
config_tuple = tuple((key, tuple(value) if isinstance(value, list) else value) for key, value in config.items())
# Include the base_name to make the configuration unique to the dataset
return (base_name, config_tuple) in executed_configurations
def mark_executedc1(config, base_name):
# Convert any lists in config to tuples to make config_tuple hashable
config_tuple = tuple((key, tuple(value) if isinstance(value, list) else value) for key, value in config.items())
# Add the hashable config_tuple and base_name to the set
executed_configurations.add((base_name, config_tuple))
def save_error_log(config, base_name, mean_error_val, mean_error_testing):
error_log.append({
'config': config,
'dataset': base_name,
'validation_error': mean_error_val,
'testing_error': mean_error_testing
})
def compare_errors():
"""Compare all errors and find the configuration with the minimum error."""
min_error_config = min(error_log, key=lambda x: x['testing_error'])
print(f"Best configuration so far: {min_error_config['config']}")
print(f"Best testing error: {min_error_config['testing_error']:.5f} m")
#this log function will creat individual log file for each kernel
def log_configuration_individual_file(log_file, C, kernel_value, gamma, epsilon, degree=None, coef0=None, test_split=None, mean_error=None, base_name=None):
with open(log_file, 'a', newline='') as f:
writer = csv.writer(f)
# Write the parameters as before
writer.writerow([C, kernel_value, gamma, epsilon, degree, coef0, test_split, mean_error, base_name])
def get_log_file(kernel, results_directory):
return os.path.join(results_directory, f'executed_configs_{kernel}_log.csv')
# In your runSVR function, call it like this
# log_file = get_log_file(kernel, results_directory)
# log_configuration(log_file, C, kernel, gamma, epsilon, degree, coef0, test_size, mean_3d_error_testing, base_name)