Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Doc and refactor #97

Merged
merged 6 commits into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Reproduce"
uuid = "560a9c3a-0b8c-11e9-0329-d39dfcb85ed2"
authors = ["Matt <[email protected]>"]
version = "0.12.3"
version = "0.13.0-dev"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand All @@ -16,6 +16,8 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
MySQL = "39abe10b-433b-5dbd-92d4-e302a9df00cd"
Parallelism = "c8c83da1-e5f9-4e2c-a857-b8617bac3554"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ A framework for managing hyper-parameter settings, and running experiments. Ligh

## What is this?

This repository is for giving you the tools you need to make experiments reproducible. This repository is mostly built around machine learning and reinforcement learning projects, but there is no reason it is restricted to these types of projects. I've developed this around my own tastes (specifically using )
This repository is for giving you the tools you need to make experiments reproducible. This repository is mostly built around machine learning and reinforcement learning projects, but there is no reason it is restricted to these types of projects. I've developed this around my own tastes and needs, but should be generally usable for any style of experiment which needs to do massively parallel parameter sweeps of a set of functions.

## How To use

Expand Down
8 changes: 6 additions & 2 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ makedocs(
"Parallel Jobs"=>"manual/parallel.md"
],
"Documentation" => [
"Parser"=>"docs/parse.md",
"Iterators"=>"docs/iterators.md",
"Experiment"=>"docs/experiment.md",
"Parallel"=>"docs/parallel.md",
"Data Structure"=>"docs/parse.md"
"Experiment Utilities"=>"docs/exp_utils.md",
"Misc Utilities"=>"docs/misc.md"

]
]
)
Expand All @@ -28,6 +32,6 @@ makedocs(

deploydocs(
repo = "github.com/mkschleg/Reproduce.jl.git",
devbranch = "master",
devbranch = "main",
versions = ["stable" => "v^"]
)
11 changes: 11 additions & 0 deletions docs/src/docs/exp_utils.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Experiment Utilities


```@meta
CurrentModule = Reproduce
```

```@autodocs
Modules = [Reproduce]
Pages = ["utils/exp_util.jl", "macros.jl"]
```
14 changes: 14 additions & 0 deletions docs/src/docs/iterators.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Iterators


```@meta
CurrentModule = Reproduce
```

```@autodocs
Modules = [Reproduce]
Pages = ["iterators.jl",
"iterators/args_iter.jl",
"iterators/args_iter_v2.jl",
"iterators/args_looper.jl"]
```
12 changes: 12 additions & 0 deletions docs/src/docs/misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Misc Utilities


```@meta
CurrentModule = Reproduce
```

```@docs
_safe_fileop
_safe_mkdir
_safe_mkpath
```
7 changes: 0 additions & 7 deletions docs/src/manual/experiment.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,9 @@ This page will be dedicated to introducing the user to building and running expe

## Experiment Struct


## Argument Iterators

### ArgIter
### ArgLooper

## Config Files

## Running experiments



# Config.jl
79 changes: 29 additions & 50 deletions src/Reproduce.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@

module Reproduce

"""
_safe_fileop

Not entirely safe, but manages the interaction between whether a folder has already been created before
another process. Kinda important for a multi-process workflow.

Can't really control what the user will do...
"""
function _safe_fileop(f::Function, check::Function)
if check()
try
Expand All @@ -16,9 +23,19 @@ function _safe_fileop(f::Function, check::Function)
end
end

"""
_safe_mkdir

`mkdir` guarded by [`_safe_fileop`](@ref).
"""
_safe_mkdir(exp_dir) =
_safe_fileop(()->mkdir(exp_dir), ()->!isdir(exp_dir))

"""
_safe_mkpath

`mkpath` guarded by [`_safe_fileop`](@ref).
"""
_safe_mkpath(exp_dir) =
_safe_fileop(()->mkpath(exp_dir), ()->!isdir(exp_dir))

Expand All @@ -38,67 +55,29 @@ export ItemCollection, search, details
include("search.jl")

# Saving utils in Config.jl are really nice. Just reusing and pirating a new type until I figure out what FileIO can and can't do.
export HDF5Manager, BSONManager, JLD2Manager, TOMLManager, save, save!, load
include("data_manager.jl")

# SQL Management...
include("sql_utils.jl")
include("sql_manager.jl")

# export HDF5Manager, BSONManager, JLD2Manager, TOMLManager, save, save!, load
include("save.jl")

abstract type AbstractArgIter end

export ArgIterator, ArgLooper
include("args_iter.jl")
include("args_iter_v2.jl")
include("args_looper.jl")
include("iterators.jl")


export Experiment, create_experiment_dir, add_experiment, pre_experiment, post_experiment
export Experiment,
create_experiment_dir,
add_experiment,
pre_experiment,
post_experiment
include("experiment.jl")

import Git

function git_head()
try
s = if IN_SLURM()
read(`git rev-parse HEAD`, String)
else
try
read(`$(Git.git()) rev-parse HEAD`, String)
catch
read(`git rev-parse HEAD`, String)
end
end
s[1:end-1]
catch
"0"
end
end

function git_branch()
try
s = if IN_SLURM()
read(`git rev-parse --symbolic-full-name --abbrev-ref HEAD`, String)
else
try
read(`$(Git.git()) rev-parse --symbolic-full-name --abbrev-ref HEAD`, String)
catch
read(`git rev-parse --symbolic-full-name --abbrev-ref HEAD`, String)
end
end
s[1:end-1]
catch
"0"
end
end
include("git_utils.jl")

include("parse.jl")

export job
include("job.jl")
include("parallel.jl")

include("utils/exp_util.jl")

include("exp_util.jl")
include("macros.jl")

end # module
100 changes: 47 additions & 53 deletions src/experiment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function get_comp_env()
if "SLURM_JOBID" ∈ keys(ENV) && "SLURM_NTASKS" ∈ keys(ENV)
SlurmParallel(parse(Int, ENV["SLURM_NTASKS"]))
elseif "SLURM_ARRAY_TASK_ID" ∈ keys(ENV)
SlurmTaskArray(parse(Int, ENV["SLURM_ARRAY_TASK_ID"]))
SlurmTaskArray(parse(Int, ENV["SLURM_ARRAY_TASK_ID"])) # this needs to be fixed.
elseif "RP_TASK_ID" ∈ keys(ENV)
LocalTask(parse(Int, ENV["RP_TASK_ID"]))
else
Expand Down Expand Up @@ -69,12 +69,28 @@ struct Metadata{ST, CE}
config::Union{String, Nothing}
end


struct Experiment{MD<:Metadata, I}
job_metadata::JobMetadata
metadata::MD
args_iter::I
end

"""
Experiment

The structure used to embody a reproduce experiment. This is usually constructed through the [`parse_experiment_from_config`](@ref), but can be used without config files.

- `dir`: the base directory of the experiment (where the info files are saved).
- `file`: The file containing the experiment function described by `func_name` and `module_name`
- `module_name`: Module name containing the experiment function.
- `func_name`: Function name of the experiment.
- `save_type`: The save structure to deal with saving data passed by the experiment.
- `args_iter`: The args iterator which contains the configs to pass to the experiment.
- `[confg]`: The config file parsed to create the experiment (optional)
# kwarg
- `[comp_env]`: The computational environment used by the experiment.
"""
function Experiment(dir, file, module_name, func_name, save_type, args_iter, config=nothing; comp_env=get_comp_env())

job_comp = JobMetadata(file, Symbol(module_name), Symbol(func_name))
Expand All @@ -85,20 +101,35 @@ function Experiment(dir, file, module_name, func_name, save_type, args_iter, con
end


function pre_experiment(exp::Experiment; kwargs...)
pre_experiment(exp.metadata.save_type, exp; kwargs...)
end
"""
pre_experiment(exp::Experiment; kwargs...)
pre_experiment(file_save::FileSave, exp; kwargs...)
pre_experiment(sql_save::SQLSave, exp; kwargs...)

This function does all the setup required to successfully run an experiment. It is dispatched on the save structure in the experiment.

function pre_experiment(file_save::FileSave, exp; kwargs...)
This function:
- Creates the base experiment directory.
- Runs [`experiment_save_init`](@ref) to initialize the details for each save type.
- runs [`add_experiment`](@ref)
"""
function pre_experiment(exp::Experiment; kwargs...)
create_experiment_dir(exp.metadata.details_loc)
create_data_dir(file_save.save_dir)
experiment_save_init(exp.metadata.save_type, exp; kwargs...)
add_experiment(exp)
end

function pre_experiment(sql_save::SQLSave, exp; kwargs...)
create_experiment_dir(exp.metadata.details_loc)
"""
experiment_save_init(save::FileSave, exp::Experiment; kwargs...)
experiment_save_init(save::SQLSave, exp::Experiment; kwargs...)

Setups the necessary compoenents to save data for the jobs. This is run by [`pre_experiment`](@ref). The `FileSave` creates the data directory where all the data is stored for an experiment. The `SQLSave` ensures the databases and tables are created necessary to successfully run an experiment.
"""
function experiment_save_init(file_save::FileSave, exp; kwargs...)
create_data_dir(file_save.save_dir)
end
function experiment_save_init(sql_save::SQLSave, exp; kwargs...)
create_database_and_tables(sql_save, exp)
add_experiment(exp)
end

function create_experiment_dir(exp_dir)
Expand All @@ -125,13 +156,9 @@ end

function create_database_and_tables(sql_save::SQLSave, exp::Experiment)

# if :sql_infofile ∈ keys(kwargs)
# else
# dbm = DBManager()
# end
dbm = DBManager(sql_save.connection_file)

db_name = get_database_name(sql_save)

# Create and switch to database. This checks to see if database exists before creating
create_and_switch_to_database(dbm, db_name)

Expand Down Expand Up @@ -176,6 +203,11 @@ get_settings_file(hash::UInt) = "settings_0x"*string(hash, base=16)*".jld2"
get_config_copy_file(hash::UInt) = "config_0x"*string(hash, base=16)*".jld2"
get_jobs_dir(details_loc) = joinpath(details_loc, "jobs")

"""
add_experiment

This adds the experiment to the directory (remember directories can contain multiple experiments).
"""
function add_experiment(exp::Experiment)

comp_env = exp.metadata.comp_env
Expand Down Expand Up @@ -217,46 +249,8 @@ function add_experiment(exp::Experiment)
end

function post_experiment(exp::Experiment, job_ret)
# post_experiment(exp.comp_env, exp, job_ret)
# I'm not sure what to put here.
end

@deprecate exception_file(args...) save_exception(args...)


function save_exception(config, exc_file, job_id, exception, trace)

if isfile(exc_file)
@warn "$(exc_file) already exists. Overwriting..."
end

open(exc_file, "w") do f
exception_string = "Exception for job_id: $(job_id)\n\n"
exception_string *= "Config: \n" * string(config) * "\n\n"
exception_string *= "Exception: \n" * string(exception) * "\n\n"

write(f, exception_string)
Base.show_backtrace(f, trace)
end

return
end

function save_exception(exc_file, job_id, exception, trace)

@warn "Please pass config to exception." maxlog=1
if isfile(exc_file)
@warn "$(exc_file) already exists. Overwriting..."
end

open(exc_file, "w") do f
exception_string =
"Exception for job_id: $(job_id)\n\n" * string(exception) * "\n\n"

write(f, exception_string)
Base.show_backtrace(f, trace)
end

return

end

Loading