@@ -10,47 +10,7 @@ using Distributed
10
10
11
11
12
12
13
- IN_SLURM () = (" SLURM_JOBID" ∈ keys (ENV )) && (" SLURM_NTASKS" ∈ keys (ENV ))
14
-
15
- function get_comp_env ()
16
- if " SLURM_JOBID" ∈ keys (ENV ) && " SLURM_NTASKS" ∈ keys (ENV )
17
- SlurmParallel (parse (Int, ENV [" SLURM_NTASKS" ]))
18
- elseif " SLURM_ARRAY_TASK_ID" ∈ keys (ENV )
19
- SlurmTaskArray (parse (Int, ENV [" SLURM_ARRAY_TASK_ID" ])) # this needs to be fixed.
20
- elseif " RP_TASK_ID" ∈ keys (ENV )
21
- LocalTask (parse (Int, ENV [" RP_TASK_ID" ]))
22
- else
23
- if " RP_NTASKS" ∈ keys (ENV )
24
- LocalParallel (parse (Int, ENV [" RP_NTASKS" ]))
25
- else
26
- LocalParallel (0 )
27
- end
28
- end
29
- end
30
-
31
-
32
- get_task_id (comp_env) = comp_env. id
33
- is_task_env (comp_env) = false
34
-
35
- struct SlurmTaskArray
36
- id:: Int
37
- end
38
-
39
- is_task_env (comp_env:: SlurmTaskArray ) = true
40
-
41
- struct SlurmParallel
42
- num_procs:: Int
43
- end
44
-
45
- struct LocalTask
46
- id:: Int
47
- end
48
13
49
- is_task_env (comp_env:: LocalTask ) = true
50
-
51
- struct LocalParallel
52
- num_procs:: Int
53
- end
54
14
55
15
56
16
# what does experiment do? Can it be simplified? Can parts of it be decomposed?
@@ -62,13 +22,22 @@ struct JobMetadata
62
22
end
63
23
64
24
struct Metadata{ST, CE}
25
+ name:: String
65
26
save_type:: ST
66
27
comp_env:: CE
67
28
details_loc:: String
68
29
hash:: UInt64
69
30
config:: Union{String, Nothing}
31
+ job_log_dir:: String
70
32
end
71
33
34
+ get_jobs_dir (comp_env, details_loc) = joinpath (details_loc, " jobs" )# , get_job_name(comp_env))
35
+
36
+ function Metadata (save_type, comp_env, dir, exp_hash, config)
37
+ name = get_job_name (comp_env)
38
+ job_log_dir = get_jobs_dir (comp_env, dir)
39
+ Metadata (name, save_type, comp_env, dir, exp_hash, config, job_log_dir)
40
+ end
72
41
73
42
struct Experiment{MD<: Metadata , I}
74
43
job_metadata:: JobMetadata
@@ -116,7 +85,7 @@ This function:
116
85
function pre_experiment (exp:: Experiment ; kwargs... )
117
86
create_experiment_dir (exp. metadata. details_loc)
118
87
experiment_save_init (exp. metadata. save_type, exp; kwargs... )
119
- add_experiment (exp)
88
+ experiment_dir_setup (exp)
120
89
end
121
90
122
91
"""
@@ -201,36 +170,55 @@ end
201
170
get_settings_dir (details_loc) = joinpath (details_loc, " settings" )
202
171
get_settings_file (hash:: UInt ) = " settings_0x" * string (hash, base= 16 )* " .jld2"
203
172
get_config_copy_file (hash:: UInt ) = " config_0x" * string (hash, base= 16 )* " .jld2"
204
- get_jobs_dir (details_loc) = joinpath (details_loc, " jobs" )
205
173
206
- """
207
- add_experiment
174
+ function experiment_dir_setup (exp:: Experiment )
175
+ experiment_dir_setup (exp. metadata. comp_env, exp)
176
+ end
208
177
209
- This adds the experiment to the directory (remember directories can contain multiple experiments).
210
- """
211
- function add_experiment (exp:: Experiment )
212
-
213
- comp_env = exp. metadata. comp_env
214
- if is_task_env (comp_env)
215
- if get_task_id (comp_env) != 1
216
- task_id = comp_env. id
217
- @info " Only add experiment for task id == 1... id : $(task_id) $(task_id == 1 ) "
218
- return
219
- end
178
+ function experiment_dir_setup (comp_env:: LocalParallel , exp:: Experiment )
179
+ exp_dir = exp. metadata. details_loc
180
+ create_jobs_folder (exp)
181
+ save_experiment_settings (exp)
182
+ end
183
+
184
+ function experiment_dir_setup (comp_env:: SlurmParallel , exp:: Experiment )
185
+ exp_dir = exp. metadata. details_loc
186
+ create_jobs_folder (exp)
187
+ save_experiment_settings (exp)
188
+ end
189
+
190
+ function experiment_dir_setup (comp_env:: SlurmTaskArray , exp:: Experiment )
191
+ exp_dir = exp. metadata. details_loc
192
+ create_jobs_folder (exp)
193
+ array_idx = comp_env. array_idx
194
+ if array_idx != 1
195
+ @info " Only save settings for array index == 1: array index = $(array_idx) "
196
+ return
220
197
end
198
+ save_experiment_settings (exp)
199
+ end
221
200
201
+ function experiment_dir_setup (comp_env:: TaskJob , exp:: Experiment )
202
+ task_id = comp_env. id
203
+ if task_id != 1
204
+ @info " Only add experiment for task id == 1... id : $(task_id) $(task_id == 1 ) "
205
+ return
206
+ end
222
207
exp_dir = exp. metadata. details_loc
208
+ save_experiment_settings (exp)
209
+ end
210
+
211
+ function create_jobs_folder (exp:: Experiment )
212
+ _safe_mkpath (exp. metadata. job_log_dir)
213
+ end
223
214
224
- @info " Adding Experiment to $(exp_dir) "
215
+ function save_experiment_settings (exp:: Experiment )# exp_dir, exp_hash)
216
+ exp_dir = exp. metadata. details_loc
217
+ exp_hash = exp. metadata. hash
225
218
226
219
settings_dir = get_settings_dir (exp_dir)
227
220
_safe_mkdir (settings_dir)
228
221
229
- if comp_env isa SlurmParallel
230
- _safe_mkdir (get_jobs_dir (exp_dir))
231
- end
232
-
233
- exp_hash = exp. metadata. hash
234
222
settings_file = joinpath (settings_dir, " settings_0x" * string (exp_hash, base= 16 )* " .jld2" )
235
223
236
224
args_iter = exp. args_iter
@@ -245,7 +233,7 @@ function add_experiment(exp::Experiment)
245
233
config_file = joinpath (settings_dir, " config_0x" * string (exp_hash, base= 16 )* splitext (config)[end ])
246
234
cp (config, config_file; force= true )
247
235
end
248
-
236
+
249
237
end
250
238
251
239
function post_experiment (exp:: Experiment , job_ret)
0 commit comments