mkschleg · mkschleg · Apr 21, 2023 · Apr 20, 2023 · Apr 20, 2023 · Apr 20, 2023
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Reproduce"
 uuid = "560a9c3a-0b8c-11e9-0329-d39dfcb85ed2"
 authors = ["Matt <[email protected]>"]
-version = "0.12.3"
+version = "0.13.0-dev"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
@@ -16,6 +16,8 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 MySQL = "39abe10b-433b-5dbd-92d4-e302a9df00cd"
 Parallelism = "c8c83da1-e5f9-4e2c-a857-b8617bac3554"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ A framework for managing hyper-parameter settings, and running experiments. Ligh
 
 ## What is this?
 
-This repository is for giving you the tools you need to make experiments reproducible. This repository is mostly built around machine learning and reinforcement learning projects, but there is no reason it is restricted to these types of projects. I've developed this around my own tastes (specifically using )
+This repository is for giving you the tools you need to make experiments reproducible. This repository is mostly built around machine learning and reinforcement learning projects, but there is no reason it is restricted to these types of projects. I've developed this around my own tastes and needs, but should be generally usable for any style of experiment which needs to do massively parallel parameter sweeps of a set of functions. 
 
 ## How To use
 

diff --git a/docs/make.jl b/docs/make.jl
@@ -12,9 +12,13 @@ makedocs(
             "Parallel Jobs"=>"manual/parallel.md"
         ],
         "Documentation" => [
+            "Parser"=>"docs/parse.md",
+            "Iterators"=>"docs/iterators.md",
             "Experiment"=>"docs/experiment.md",
             "Parallel"=>"docs/parallel.md",
-            "Data Structure"=>"docs/parse.md"
+            "Experiment Utilities"=>"docs/exp_utils.md",
+            "Misc Utilities"=>"docs/misc.md"
+
         ]
     ]
 )
@@ -28,6 +32,6 @@ makedocs(
 
 deploydocs(
     repo = "github.com/mkschleg/Reproduce.jl.git",
-    devbranch = "master",
+    devbranch = "main",
     versions = ["stable" => "v^"]
 )
diff --git a/docs/src/docs/exp_utils.md b/docs/src/docs/exp_utils.md
@@ -0,0 +1,11 @@
+# Experiment Utilities
+
+
+```@meta
+CurrentModule = Reproduce
+```
+
+```@autodocs
+Modules = [Reproduce]
+Pages   = ["utils/exp_util.jl", "macros.jl"]
+```
diff --git a/docs/src/docs/iterators.md b/docs/src/docs/iterators.md
@@ -0,0 +1,14 @@
+# Iterators
+
+
+```@meta
+CurrentModule = Reproduce
+```
+
+```@autodocs
+Modules = [Reproduce]
+Pages   = ["iterators.jl", 
+           "iterators/args_iter.jl", 
+           "iterators/args_iter_v2.jl", 
+           "iterators/args_looper.jl"]
+```
diff --git a/docs/src/docs/misc.md b/docs/src/docs/misc.md
@@ -0,0 +1,12 @@
+# Misc Utilities
+
+
+```@meta
+CurrentModule = Reproduce
+```
+
+```@docs
+_safe_fileop
+_safe_mkdir
+_safe_mkpath
+```
diff --git a/docs/src/manual/experiment.md b/docs/src/manual/experiment.md
@@ -6,16 +6,9 @@ This page will be dedicated to introducing the user to building and running expe
 
 ## Experiment Struct
 
-
 ## Argument Iterators
 
-### ArgIter
-### ArgLooper
-
 ## Config Files
 
 ## Running experiments
 
-
-
-# Config.jl
diff --git a/src/Reproduce.jl b/src/Reproduce.jl
@@ -1,7 +1,14 @@
 
 module Reproduce
 
+"""
+    _safe_fileop
 
+Not entirely safe, but manages the interaction between whether a folder has already been created before
+another process. Kinda important for a multi-process workflow.
+
+Can't really control what the user will do...
+"""
 function _safe_fileop(f::Function, check::Function)
     if check()
         try
@@ -16,9 +23,19 @@ function _safe_fileop(f::Function, check::Function)
     end
 end
 
+"""
+    _safe_mkdir
+
+`mkdir` guarded by [`_safe_fileop`](@ref).
+"""
 _safe_mkdir(exp_dir) = 
     _safe_fileop(()->mkdir(exp_dir), ()->!isdir(exp_dir))
 
+"""
+    _safe_mkpath
+
+`mkpath` guarded by [`_safe_fileop`](@ref).
+"""
 _safe_mkpath(exp_dir) = 
     _safe_fileop(()->mkpath(exp_dir), ()->!isdir(exp_dir))
 
@@ -38,67 +55,29 @@ export ItemCollection, search, details
 include("search.jl")
 
 # Saving utils in Config.jl are really nice. Just reusing and pirating a new type until I figure out what FileIO can and can't do.
-export HDF5Manager, BSONManager, JLD2Manager, TOMLManager, save, save!, load
-include("data_manager.jl")
-
-# SQL Management...
-include("sql_utils.jl")
-include("sql_manager.jl")
-
+# export HDF5Manager, BSONManager, JLD2Manager, TOMLManager, save, save!, load
 include("save.jl")
 
-abstract type AbstractArgIter end
 
-export ArgIterator, ArgLooper
-include("args_iter.jl")
-include("args_iter_v2.jl")
-include("args_looper.jl")
+include("iterators.jl")
 
 
-export Experiment, create_experiment_dir, add_experiment, pre_experiment, post_experiment
+export Experiment,
+    create_experiment_dir,
+    add_experiment,
+    pre_experiment,
+    post_experiment
 include("experiment.jl")
 
-import Git
-
-function git_head()
-    try
-        s = if IN_SLURM()
-            read(`git rev-parse HEAD`, String)
-        else
-            try
-                read(`$(Git.git()) rev-parse HEAD`, String)
-            catch
-                read(`git rev-parse HEAD`, String)
-            end
-        end
-        s[1:end-1]
-    catch
-        "0"
-    end
-end
-
-function git_branch()
-    try
-        s = if IN_SLURM()
-            read(`git rev-parse --symbolic-full-name --abbrev-ref HEAD`, String)
-        else
-            try
-                read(`$(Git.git()) rev-parse --symbolic-full-name --abbrev-ref HEAD`, String)
-            catch
-                read(`git rev-parse --symbolic-full-name --abbrev-ref HEAD`, String)
-            end
-        end
-        s[1:end-1]
-    catch
-        "0"
-    end
-end
+include("git_utils.jl")
 
 include("parse.jl")
 
 export job
-include("job.jl")
+include("parallel.jl")
+
+include("utils/exp_util.jl")
 
-include("exp_util.jl")
+include("macros.jl")
 
 end # module
diff --git a/src/experiment.jl b/src/experiment.jl
@@ -16,7 +16,7 @@ function get_comp_env()
     if "SLURM_JOBID" ∈ keys(ENV) && "SLURM_NTASKS" ∈ keys(ENV)
         SlurmParallel(parse(Int, ENV["SLURM_NTASKS"]))
     elseif "SLURM_ARRAY_TASK_ID" ∈ keys(ENV)
-        SlurmTaskArray(parse(Int, ENV["SLURM_ARRAY_TASK_ID"]))
+        SlurmTaskArray(parse(Int, ENV["SLURM_ARRAY_TASK_ID"])) # this needs to be fixed.
     elseif "RP_TASK_ID" ∈ keys(ENV)
         LocalTask(parse(Int, ENV["RP_TASK_ID"]))
     else
@@ -69,12 +69,28 @@ struct Metadata{ST, CE}
     config::Union{String, Nothing}
 end
 
+
 struct Experiment{MD<:Metadata, I}
     job_metadata::JobMetadata
     metadata::MD
     args_iter::I
 end
 
+"""
+    Experiment
+
+The structure used to embody a reproduce experiment. This is usually constructed through the [`parse_experiment_from_config`](@ref), but can be used without config files.
+
+- `dir`: the base directory of the experiment (where the info files are saved).
+- `file`: The file containing the experiment function described by `func_name` and `module_name`
+- `module_name`: Module name containing the experiment function.
+- `func_name`: Function name of the experiment.
+- `save_type`: The save structure to deal with saving data passed by the experiment.
+- `args_iter`: The args iterator which contains the configs to pass to the experiment.
+- `[confg]`: The config file parsed to create the experiment (optional)
+# kwarg
+- `[comp_env]`: The computational environment used by the experiment.
+"""
 function Experiment(dir, file, module_name, func_name, save_type, args_iter, config=nothing; comp_env=get_comp_env())
 
     job_comp = JobMetadata(file, Symbol(module_name), Symbol(func_name))
@@ -85,20 +101,35 @@ function Experiment(dir, file, module_name, func_name, save_type, args_iter, con
 end
 
 
-function pre_experiment(exp::Experiment; kwargs...)
-    pre_experiment(exp.metadata.save_type, exp; kwargs...)
-end
+"""
+    pre_experiment(exp::Experiment; kwargs...)
+    pre_experiment(file_save::FileSave, exp; kwargs...)
+    pre_experiment(sql_save::SQLSave, exp; kwargs...)
+
+This function does all the setup required to successfully run an experiment. It is dispatched on the save structure in the experiment.
 
-function pre_experiment(file_save::FileSave, exp; kwargs...)
+This function:
+- Creates the base experiment directory.
+- Runs [`experiment_save_init`](@ref) to initialize the details for each save type.
+- runs [`add_experiment`](@ref)
+"""
+function pre_experiment(exp::Experiment; kwargs...)
     create_experiment_dir(exp.metadata.details_loc)
-    create_data_dir(file_save.save_dir)
+    experiment_save_init(exp.metadata.save_type, exp; kwargs...)
     add_experiment(exp)
 end
 
-function pre_experiment(sql_save::SQLSave, exp; kwargs...)
-    create_experiment_dir(exp.metadata.details_loc)
+"""
+    experiment_save_init(save::FileSave, exp::Experiment; kwargs...)
+    experiment_save_init(save::SQLSave, exp::Experiment; kwargs...)
+
+Setups the necessary compoenents to save data for the jobs. This is run by [`pre_experiment`](@ref). The `FileSave` creates the data directory where all the data is stored for an experiment. The `SQLSave` ensures the databases and tables are created necessary to successfully run an experiment.
+"""
+function experiment_save_init(file_save::FileSave, exp; kwargs...)
+    create_data_dir(file_save.save_dir)
+end
+function experiment_save_init(sql_save::SQLSave, exp; kwargs...)
     create_database_and_tables(sql_save, exp)
-    add_experiment(exp)
 end
 
 function create_experiment_dir(exp_dir)
@@ -125,13 +156,9 @@ end
 
 function create_database_and_tables(sql_save::SQLSave, exp::Experiment)
 
-    # if :sql_infofile ∈ keys(kwargs)
-    # else
-    #     dbm = DBManager()
-    # end
     dbm = DBManager(sql_save.connection_file)
-
     db_name = get_database_name(sql_save)
+
     # Create and switch to database. This checks to see if database exists before creating
     create_and_switch_to_database(dbm, db_name)
 
@@ -176,6 +203,11 @@ get_settings_file(hash::UInt) = "settings_0x"*string(hash, base=16)*".jld2"
 get_config_copy_file(hash::UInt) = "config_0x"*string(hash, base=16)*".jld2"
 get_jobs_dir(details_loc) = joinpath(details_loc, "jobs")
 
+"""
+    add_experiment
+
+This adds the experiment to the directory (remember directories can contain multiple experiments).
+"""
 function add_experiment(exp::Experiment)
 
     comp_env = exp.metadata.comp_env
@@ -217,46 +249,8 @@ function add_experiment(exp::Experiment)
 end
 
 function post_experiment(exp::Experiment, job_ret)
-    # post_experiment(exp.comp_env, exp, job_ret)
+    # I'm not sure what to put here.
 end
 
-@deprecate exception_file(args...) save_exception(args...)
-
-
-function save_exception(config, exc_file, job_id, exception, trace)
 
-    if isfile(exc_file)
-        @warn "$(exc_file) already exists. Overwriting..."
-    end
-
-    open(exc_file, "w") do f
-        exception_string = "Exception for job_id: $(job_id)\n\n"
-        exception_string *= "Config: \n" * string(config) * "\n\n"
-        exception_string *= "Exception: \n" * string(exception) * "\n\n"
-
-        write(f, exception_string)
-        Base.show_backtrace(f, trace)
-    end
-
-    return
-end
-
-function save_exception(exc_file, job_id, exception, trace)
-
-    @warn "Please pass config to exception." maxlog=1
-    if isfile(exc_file)
-        @warn "$(exc_file) already exists. Overwriting..."
-    end
-
-    open(exc_file, "w") do f
-        exception_string =
-            "Exception for job_id: $(job_id)\n\n" * string(exception) * "\n\n"
-
-        write(f, exception_string)
-        Base.show_backtrace(f, trace)
-    end
-
-    return
-
-end