diff --git a/.github/workflows/RunChecks.yaml b/.github/workflows/RunChecks.yaml index b208eaa..befc751 100644 --- a/.github/workflows/RunChecks.yaml +++ b/.github/workflows/RunChecks.yaml @@ -35,10 +35,32 @@ jobs: r-version: ${{ matrix.config.r }} use-public-rspm: true - - name: Install Slurm tools + - name: Download mocked binaries run: | - sudo apt-get update - sudo apt-get install -y slurm-client + mkdir -p /usr/local/bin + cd /tmp + gh release download "sinfo/v0.2.0" -R a2-ai-tech-training/slurm-mocks -p "*Linux_x86_64.tar.gz" -D /usr/local/bin/ + tar -xvf /usr/local/bin/sinfo_Linux_x86_64.tar.gz + mv ./sinfo /usr/local/bin/sinfo + chmod +x /usr/local/bin/sinfo + + gh release download "sacct/v0.2.0" -R a2-ai-tech-training/slurm-mocks -p "*Linux_x86_64.tar.gz" -D /usr/local/bin/ + tar -xvf /usr/local/bin/sacct_Linux_x86_64.tar.gz + mv ./sacct /usr/local/bin/sacct + chmod +x /usr/local/bin/sacct + + echo "/usr/local/bin" >> $GITHUB_PATH + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Grab outputs directory from mocked repo + run: | + cd /tmp + gh repo clone a2-ai-tech-training/slurm-mocks + mv /tmp/slurm-mocks/sinfo/outputs $GITHUB_WORKSPACE/ + rm -rf /tmp/slurm-mocks + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - uses: r-lib/actions/setup-r-dependencies@v2 with: @@ -52,5 +74,3 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true - - diff --git a/inst/doc/Running-nonmem.R b/inst/doc/Running-nonmem.R deleted file mode 100644 index 0f22cd3..0000000 --- a/inst/doc/Running-nonmem.R +++ /dev/null @@ -1,81 +0,0 @@ -## ----include = FALSE---------------------------------------------------------- -#removing generated files from running this vignette -nonmem <- file.path("model", "nonmem") - -unlink(file.path(nonmem, "1001"), recursive = TRUE) -unlink(file.path(nonmem, "1001.yaml")) -unlink(file.path(nonmem, "1001.toml")) -unlink(file.path(nonmem, "submission-log"), recursive = TRUE) -unlink(file.path(nonmem, "in_progress"), recursive = TRUE) - -## ----include = FALSE---------------------------------------------------------- -knitr::opts_chunk$set( - collapse = TRUE, - comment = "" -) - -## ----setup-------------------------------------------------------------------- -library(slurmtools) - -## ----------------------------------------------------------------------------- -Sys.which("bbi") - -## ----------------------------------------------------------------------------- -library(bbr) -library(here) - -nonmem = file.path(here::here(), "vignettes", "model", "nonmem") - -options('slurmtools.submission_root' = file.path(nonmem, "submission-log")) -options('slurmtools.bbi_config_path' = file.path(nonmem, "bbi.yaml")) - -## ----------------------------------------------------------------------------- -mod_number <- "1001" - -if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) { - mod <- bbr::read_model(file.path(nonmem, mod_number)) -} else { - mod <- bbr::new_model(file.path(nonmem, mod_number)) -} - -## ----------------------------------------------------------------------------- -submission <- slurmtools::submit_nonmem_model( - mod, - slurm_job_template_path = file.path(nonmem, "slurm-job-bbi.tmpl"), -) - -submission - -## ----------------------------------------------------------------------------- -slurmtools::get_slurm_jobs(user = 'matthews') - -## ----------------------------------------------------------------------------- -submission_ntfy <- slurmtools::submit_nonmem_model( - mod, - slurm_job_template_path = file.path(nonmem, "slurm-job-bbi-ntfy.tmpl"), - overwrite = TRUE, - slurm_template_opts = list( - ntfy = "ntfy_demo") -) - -submission_ntfy - -## ----include = FALSE---------------------------------------------------------- -#cancelling any running nonmem jobs -state <- slurmtools::get_slurm_jobs(user = "matthews") - -if (any(state$job_state %in% c("RUNNING", "CONFIGURING"))) { - for (job_id in state %>% dplyr::filter(job_state == "RUNNING") %>% dplyr::pull("job_id")) { - processx::run("scancel", args = paste0(job_id)) - } -} - -#removing generated files from running this vignette -nonmem <- file.path("model", "nonmem") - -unlink(file.path(nonmem, "1001"), recursive = TRUE) -unlink(file.path(nonmem, "1001.yaml")) -unlink(file.path(nonmem, "1001.toml")) -unlink(file.path(nonmem, "submission-log"), recursive = TRUE) -unlink(file.path(nonmem, "in_progress"), recursive = TRUE) - diff --git a/inst/doc/Running-nonmem.Rmd b/inst/doc/Running-nonmem.Rmd deleted file mode 100644 index ce7a020..0000000 --- a/inst/doc/Running-nonmem.Rmd +++ /dev/null @@ -1,269 +0,0 @@ ---- -title: "Running Nonmem with slurmtools" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Running Nonmem with slurmtools} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -#removing generated files from running this vignette -nonmem <- file.path("model", "nonmem") - -unlink(file.path(nonmem, "1001"), recursive = TRUE) -unlink(file.path(nonmem, "1001.yaml")) -unlink(file.path(nonmem, "1001.toml")) -unlink(file.path(nonmem, "submission-log"), recursive = TRUE) -unlink(file.path(nonmem, "in_progress"), recursive = TRUE) -``` - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "" -) -``` - -# Slurmtools for submitting NONMEM runs - -`slurmtools` is an R package for interacting with slurm (fka **S**imple **L**inux **U**tility for **R**esource **M**anagement) and submitting NONMEM jobs. You can submit a NONMEM job with `submit_nonmem_model`, you can view current jobs with `get_slurm_jobs`, and you can see the available partitions with `get_slurm_partitions`. - -## Installing `slurmtools` - -To install `slurmtools` use the following commands: - -``` r -options(repos = c( - "slurmtools" = "https://a2-ai.github.io/gh-pkg-mirror/slurmtools", - getOption("repos"))) -install.packages("slurmtools") -``` - -```{r setup} -library(slurmtools) -``` - -We are given a message when loading slurmtools that some options are not set and that default job submission will not work without them. These options are used for default arguments in the `submit_nonmem_model` function. Running `?submit_nonmem_model` we can see the documentation - -![Help view for `submit_nonmem_model` function](data/images/submit_nonmem_model_help.png) - -This function uses the inputs to populate a template Bash shell script that submits the NONMEM job to slurm. A default template file is supplied with the Project Starter and it can be modified to do additional tasks as long as they are possible within Bash. - -By default these values are provided to the slurm template file: - -``` r -default_template_list = list( - partition = partition, - parallel = parallel, - ncpu = ncpu, - job_name = sprintf("%s-nonmem-run", basename(.mod$absolute_model_path)), - project_path = project_path, - project_name = project_name, - bbi_exe_path = Sys.which("bbi"), - bbi_config_path = bbi_config_path, - model_path = .mod$absolute_model_path, - config_toml_path = config_toml_path, - nmm_exe_path = Sys.which("nmm") -) -``` - -- `partition` is an argument to `submit_nonmem_model` - -- `parallel` is `TRUE` if `ncpu > 1`, else `FALSE` - -- `ncpu` is an argument to `submit_nonmem_model` - -- `job_name` is created from the `.mod` argument supplied to `submit_nonmem_model` - -- `bbi_exe_path` is determined via \`Sys.which("bbi") - -- `bbi_config_path` is determined via getOption("slurmtools.bbi_config_path") - -- `model_path` is determined from the `.mod` argument supplied to `submit_nonmem_model` - -- `config_toml_path` is determined from the `.mod` argument supplied to `submit_nonmem_model` and is requried to use `nmm` (NONMEM monitor) - -- `nmm_exe_path` is determined via `Sys.which("nmm")` - -If you need to feed more arguments to the template you simply supply them in the `slurm_template_opts` argument as a list. More on that later. - -## Submitting a NONMEM job with `bbi` - -To submit a NONMEM job, we need to supply either the path to a mod file or create a model object from `bbr`, and supply a `slurm-template.tmpl` file. To use `bbi` we also need a `bbi.yaml` file, which I've also supplied in `/model/nonmem/bbi.yaml` (and is also supplied with the R project starter). - -Here is an example of a template file that will call `bbi`: - -``` slurm-job-bbi.tmpl -#!/bin/bash -#SBATCH --job-name="{{job_name}}" -#SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task={{ncpu}} -#SBATCH --partition={{partition}} -#SBATCH --account={{project_name}} - -# submit_nonmem_model uses the whisker package to populate template files -# https://github.com/edwindj/whisker - -{{#parallel}} -{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}} -{{/parallel}} - - -{{^parallel}} -{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}} -{{/parallel}} -``` - -This file will call `bbi` to run our supplied model (`{{model_path}}.mod`) if `ncpu > 1` then parallel will be true and the code between `{{#parallel}}` and `{{/parallel}}` will be populated. if `ncpu = 1` then parallel will be false and the code between `{{^parallel}}` and `{{/parallel}}` will be populated. By default, `submit_nonmem_model` will inject `Sys.which("bbi")` into the template, so if `bbi` is not on your path we'll have to supply the `bbi_exe_path` for it to start the NONMEM run. - -```{r} -Sys.which("bbi") -``` - -We will use a few different template files with different functionality so we'll inject those template file paths to `submit_nonmem_model`. However, we'll use the `submission-log` directory for the output, so we'll set that option as well as `bbi_config_path` so `submit_nonmem_model` defaults can be used. The slurm template files are saved in `~/model/nonmem/` Additionally, there is a simple NONMEM control stream in `1001.mod` in the same directory that we can use for testing. - -```{r} -library(bbr) -library(here) - -nonmem = file.path(here::here(), "vignettes", "model", "nonmem") - -options('slurmtools.submission_root' = file.path(nonmem, "submission-log")) -options('slurmtools.bbi_config_path' = file.path(nonmem, "bbi.yaml")) -``` - -To create the `bbr` model object, we need to have both `1001.mod` and `1001.yaml` which contains metadata about the model in the supplied directory (`./model/nonmem/`). We'll check for mod_number.yaml and if it exists, read in the model otherwise create it and then read it. - -```{r} -mod_number <- "1001" - -if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) { - mod <- bbr::read_model(file.path(nonmem, mod_number)) -} else { - mod <- bbr::new_model(file.path(nonmem, mod_number)) -} -``` - -We can now submit the job and point to the template file in `model/nonmem/slurm-job-bbi.tmpl`. - -```{r} -submission <- slurmtools::submit_nonmem_model( - mod, - slurm_job_template_path = file.path(nonmem, "slurm-job-bbi.tmpl"), -) - -submission -``` - -We see a `status` with an exit code of 0 suggesting a successful command, and the `stdout` gives us the batch job number. We can use `slurmtools::get_slurm_jobs()` to monitor the status of the job. Here, we can supply the user = "matthews" argument to filter to just the jobs I've submitted. - -```{r} -slurmtools::get_slurm_jobs(user = 'matthews') -``` - -If we look in the `slurmtools.submisstion_root` directory we can see the shell script that was generated with `submit_nonmem_model`. Here is the whisker replaced call to bbi: - -``` 1001.sh -/usr/local/bin/bbi nonmem run local /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/1001.mod --config /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/bbi.yaml -``` - -## Extending templates - -Because the templates create a bash shell script there is an almost infinite number of things we can do with our template. **Anything you can do in bash you can do by appropriately updating the template file and injecting the needed information!** - -Let's add a notification feature that will send a notification when the job has started and finished. We can use [ntfy.sh](ntfy.sh) and add the necessary info to our template to achieve this. - -Here is a modified template file that adds a `JOBID=$SLURM_JOBID` and some ntfy calls. To get a notification we can supply `submit_nonmem_model` with `ntfy` variable to send notifications. I'll use `ntfy = ntfy_demo` for this. - -``` slurm-job-bbi-ntfy.tmpl -#!/bin/bash -#SBATCH --job-name="{{job_name}}" -#SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task={{ncpu}} -#SBATCH --partition={{partition}} -#SBATCH --account={{project_name}} - -JOBID=$SLURM_JOBID - -# submit_nonmem_model uses the whisker package to populate template files -# https://github.com/edwindj/whisker - -{{#ntfy}} -curl -d "Starting model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}} -{{/ntfy}} - -{{#parallel}} -{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}} -{{/parallel}} - -{{^parallel}} -{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}} -{{/parallel}} - -{{#ntfy}} -curl -d "Finished model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}} -{{/ntfy}} -``` - -Since we've already run this model we will provide the `overwrite = TRUE` argument to force a new nonmem run. - -```{r} -submission_ntfy <- slurmtools::submit_nonmem_model( - mod, - slurm_job_template_path = file.path(nonmem, "slurm-job-bbi-ntfy.tmpl"), - overwrite = TRUE, - slurm_template_opts = list( - ntfy = "ntfy_demo") -) - -submission_ntfy -``` - -We again get a 0 exit code status and now instead of using `slurmtools::get_slurm_jobs()` to monitor the job, we can rely on the new notifications we just set up. ![NONMEM job starting ntfy alert](data/images/ntfy_starting.png) - -and when the run finished we get another notification: ![NONMEM Job finished ntfy alert](data/images/ntfy_finished.png) - -Note that the run number will match the run specified in `submission$stdout`. We can see the new shell script this updated template file generated - -``` 1001.sh -#!/bin/bash -#SBATCH --job-name="1001-nonmem-run" -#SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task=1 -#SBATCH --partition=cpu2mem4gb -#SBATCH --account=slurmtools - -JOBID=$SLURM_JOBID - -curl -d "Starting model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo - -/usr/local/bin/bbi nonmem run local /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/1001.mod --config /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/bbi.yaml - -curl -d "Finished model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo -``` - -To reiterate, this template file is run as a bash shell script so anything you can do in bash you can put into the template and pass the needed arguments and customize the behavior to your liking. - -```{r, include = FALSE} -#cancelling any running nonmem jobs -state <- slurmtools::get_slurm_jobs(user = "matthews") - -if (any(state$job_state %in% c("RUNNING", "CONFIGURING"))) { - for (job_id in state %>% dplyr::filter(job_state == "RUNNING") %>% dplyr::pull("job_id")) { - processx::run("scancel", args = paste0(job_id)) - } -} - -#removing generated files from running this vignette -nonmem <- file.path("model", "nonmem") - -unlink(file.path(nonmem, "1001"), recursive = TRUE) -unlink(file.path(nonmem, "1001.yaml")) -unlink(file.path(nonmem, "1001.toml")) -unlink(file.path(nonmem, "submission-log"), recursive = TRUE) -unlink(file.path(nonmem, "in_progress"), recursive = TRUE) -``` diff --git a/inst/doc/Running-nonmem.html b/inst/doc/Running-nonmem.html deleted file mode 100644 index 94827a2..0000000 --- a/inst/doc/Running-nonmem.html +++ /dev/null @@ -1,638 +0,0 @@ - - - - -
- - - - - - - - - -slurmtools
is an R package for interacting with slurm
-(fka Simple Linux
-Utility for Resource
-Management) and submitting NONMEM jobs. You can submit
-a NONMEM job with submit_nonmem_model
, you can view current
-jobs with get_slurm_jobs
, and you can see the available
-partitions with get_slurm_partitions
.
slurmtools
To install slurmtools
use the following commands:
options(repos = c(
- "slurmtools" = "https://a2-ai.github.io/gh-pkg-mirror/slurmtools",
- getOption("repos")))
-install.packages("slurmtools")
We are given a message when loading slurmtools that some options are
-not set and that default job submission will not work without them.
-These options are used for default arguments in the
-submit_nonmem_model
function. Running
-?submit_nonmem_model
we can see the documentation
This function uses the inputs to populate a template Bash shell -script that submits the NONMEM job to slurm. A default template file is -supplied with the Project Starter and it can be modified to do -additional tasks as long as they are possible within Bash.
-By default these values are provided to the slurm template file:
-default_template_list = list(
- partition = partition,
- parallel = parallel,
- ncpu = ncpu,
- job_name = sprintf("%s-nonmem-run", basename(.mod$absolute_model_path)),
- project_path = project_path,
- project_name = project_name,
- bbi_exe_path = Sys.which("bbi"),
- bbi_config_path = bbi_config_path,
- model_path = .mod$absolute_model_path,
- config_toml_path = config_toml_path,
- nmm_exe_path = Sys.which("nmm")
-)
partition
is an argument to
-submit_nonmem_model
parallel
is TRUE
if
-ncpu > 1
, else FALSE
ncpu
is an argument to
-submit_nonmem_model
job_name
is created from the .mod
-argument supplied to submit_nonmem_model
bbi_exe_path
is determined via
-`Sys.which(“bbi”)
bbi_config_path
is determined via
-getOption(“slurmtools.bbi_config_path”)
model_path
is determined from the .mod
-argument supplied to submit_nonmem_model
config_toml_path
is determined from the
-.mod
argument supplied to submit_nonmem_model
-and is requried to use nmm
(NONMEM monitor)
nmm_exe_path
is determined via
-Sys.which("nmm")
If you need to feed more arguments to the template you simply supply
-them in the slurm_template_opts
argument as a list. More on
-that later.
bbi
To submit a NONMEM job, we need to supply either the path to a mod
-file or create a model object from bbr
, and supply a
-slurm-template.tmpl
file. To use bbi
we also
-need a bbi.yaml
file, which I’ve also supplied in
-/model/nonmem/bbi.yaml
(and is also supplied with the R
-project starter).
Here is an example of a template file that will call
-bbi
:
#!/bin/bash
-#SBATCH --job-name="{{job_name}}"
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task={{ncpu}}
-#SBATCH --partition={{partition}}
-#SBATCH --account={{project_name}}
-
-# submit_nonmem_model uses the whisker package to populate template files
-# https://github.com/edwindj/whisker
-
-{{#parallel}}
-{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}}
-{{/parallel}}
-
-
-{{^parallel}}
-{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}}
-{{/parallel}}
-This file will call bbi
to run our supplied model
-({{model_path}}.mod
) if ncpu > 1
then
-parallel will be true and the code between {{#parallel}}
-and {{/parallel}}
will be populated. if
-ncpu = 1
then parallel will be false and the code between
-{{^parallel}}
and {{/parallel}}
will be
-populated. By default, submit_nonmem_model
will inject
-Sys.which("bbi")
into the template, so if bbi
-is not on your path we’ll have to supply the bbi_exe_path
-for it to start the NONMEM run.
We will use a few different template files with different
-functionality so we’ll inject those template file paths to
-submit_nonmem_model
. However, we’ll use the
-submission-log
directory for the output, so we’ll set that
-option as well as bbi_config_path
so
-submit_nonmem_model
defaults can be used. The slurm
-template files are saved in ~/model/nonmem/
Additionally,
-there is a simple NONMEM control stream in 1001.mod
in the
-same directory that we can use for testing.
library(bbr)
-library(here)
-
-nonmem = file.path(here::here(), "vignettes", "model", "nonmem")
-
-options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
-options('slurmtools.bbi_config_path' = file.path(nonmem, "bbi.yaml"))
To create the bbr
model object, we need to have both
-1001.mod
and 1001.yaml
which contains metadata
-about the model in the supplied directory
-(./model/nonmem/
). We’ll check for mod_number.yaml and if
-it exists, read in the model otherwise create it and then read it.
mod_number <- "1001"
-
-if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) {
- mod <- bbr::read_model(file.path(nonmem, mod_number))
-} else {
- mod <- bbr::new_model(file.path(nonmem, mod_number))
-}
We can now submit the job and point to the template file in
-model/nonmem/slurm-job-bbi.tmpl
.
submission <- slurmtools::submit_nonmem_model(
- mod,
- slurm_job_template_path = file.path(nonmem, "slurm-job-bbi.tmpl"),
-)
-
-submission
-$status
-[1] 0
-
-$stdout
-[1] "Submitted batch job 878\n"
-
-$stderr
-[1] ""
-
-$timeout
-[1] FALSE
We see a status
with an exit code of 0 suggesting a
-successful command, and the stdout
gives us the batch job
-number. We can use slurmtools::get_slurm_jobs()
to monitor
-the status of the job. Here, we can supply the user = “matthews”
-argument to filter to just the jobs I’ve submitted.
slurmtools::get_slurm_jobs(user = 'matthews')
-# A tibble: 11 × 10
- job_id job_state cpus partition standard_input standard_output
- <int> <chr> <int> <chr> <chr> <chr>
- 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
- 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
- 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
- 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
- 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
- 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
- 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
- 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
- 9 876 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-10 877 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-11 878 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-# ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
-# current_working_directory <chr>
If we look in the slurmtools.submisstion_root
directory
-we can see the shell script that was generated with
-submit_nonmem_model
. Here is the whisker replaced call to
-bbi:
Because the templates create a bash shell script there is an almost -infinite number of things we can do with our template. Anything -you can do in bash you can do by appropriately updating the template -file and injecting the needed information!
-Let’s add a notification feature that will send a notification when -the job has started and finished. We can use ntfy.sh and add the necessary info to our template to -achieve this.
-Here is a modified template file that adds a
-JOBID=$SLURM_JOBID
and some ntfy calls. To get a
-notification we can supply submit_nonmem_model
with
-ntfy
variable to send notifications. I’ll use
-ntfy = ntfy_demo
for this.
#!/bin/bash
-#SBATCH --job-name="{{job_name}}"
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task={{ncpu}}
-#SBATCH --partition={{partition}}
-#SBATCH --account={{project_name}}
-
-JOBID=$SLURM_JOBID
-
-# submit_nonmem_model uses the whisker package to populate template files
-# https://github.com/edwindj/whisker
-
-{{#ntfy}}
-curl -d "Starting model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}}
-{{/ntfy}}
-
-{{#parallel}}
-{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}}
-{{/parallel}}
-
-{{^parallel}}
-{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}}
-{{/parallel}}
-
-{{#ntfy}}
-curl -d "Finished model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}}
-{{/ntfy}}
-Since we’ve already run this model we will provide the
-overwrite = TRUE
argument to force a new nonmem run.
submission_ntfy <- slurmtools::submit_nonmem_model(
- mod,
- slurm_job_template_path = file.path(nonmem, "slurm-job-bbi-ntfy.tmpl"),
- overwrite = TRUE,
- slurm_template_opts = list(
- ntfy = "ntfy_demo")
-)
-
-submission_ntfy
-$status
-[1] 0
-
-$stdout
-[1] "Submitted batch job 879\n"
-
-$stderr
-[1] ""
-
-$timeout
-[1] FALSE
We again get a 0 exit code status and now instead of using
-slurmtools::get_slurm_jobs()
to monitor the job, we can
-rely on the new notifications we just set up.
and when the run finished we get another notification:
-Note that the run number will match the run specified in
-submission$stdout
. We can see the new shell script this
-updated template file generated
#!/bin/bash
-#SBATCH --job-name="1001-nonmem-run"
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-#SBATCH --partition=cpu2mem4gb
-#SBATCH --account=slurmtools
-
-JOBID=$SLURM_JOBID
-
-curl -d "Starting model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo
-
-/usr/local/bin/bbi nonmem run local /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/1001.mod --config /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/bbi.yaml
-
-curl -d "Finished model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo
To reiterate, this template file is run as a bash shell script so -anything you can do in bash you can put into the template and pass the -needed arguments and customize the behavior to your liking.
-library(slurmtools)
-#> ── Needed slurmtools options ───────────────────────────────────────────────────
-#> ✖ option('slurmtools.slurm_job_template_path') is not set.
-#> ✖ option('slurmtools.submission_root') is not set.
-#> ✖ option('slurmtools.bbi_config_path') is not set.
-#> ℹ Please set all options for job submission defaults to work.
-library(bbr)
-library(here)
-#> here() starts at /cluster-data/user-homes/matthews/Packages/slurmtools
-
-nonmem = file.path(here::here(), "vignettes", "model", "nonmem")
-options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
Instead of using bbi we can use nmm
(NONMEM Monitor) which
-currently has some additional functionality of sending notifications
-about zero gradients, missing -1E9 lines in ext file, and some very
-basic control stream errors. Currently, only slack or ntfy.sh is supported for receiving notifications. To
-use nmm
you can install the latest release from the github
-repository linked above.
We can update the template file accordingly:
-#!/bin/bash
-#SBATCH --job-name="{{job_name}}"
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task={{ncpu}}
-#SBATCH --partition={{partition}}
-
-{{nmm_exe_path}} -c {{config_toml_path}} run
-default, submit_nonmem_model
will provide
-nmm_exe_path
and config_toml_path
to the
-template. Just like with bbi_exe_path
,
-nmm_exe_path
is determined with
-Sys.which("nmm")
which may or may not give you the path to
-the nmm binary if it is on your path or not. We can inject the
-nmm_exe_path
like we did with bbi_exe_path
and
-assume it’s not on our path.
The config.toml
file controls what nmm
will
-monitor and where to look for files and how to alert you. We’ll use
-generate_nmm_config()
to create this file. First we can
-look at the documentation to see what type of information we should pass
-to this function.
mod_number <- "1001"
-
-if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) {
- mod <- bbr::read_model(file.path(nonmem, mod_number))
-} else {
- mod <- bbr::new_model(file.path(nonmem, mod_number))
-}
This generates the following toml file. Notice that alert is set to
-‘None’, and both email and topic are empty. Since we’re in vignettes
-we’ll need to update the watched_dir
and
-output_dir
.
model_number = '1001'
-files_to_track = [ 'lst', 'ext', 'grd' ]
-tmp_dir = '/tmp'
-watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/model/nonmem'
-output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/model/nonmem/in_progress'
-poll_duration = 1
-alert = 'None'
-level = 'Debug'
-email = ''
-threads = 1
-topic = ''
slurmtools::generate_nmm_config(
- mod,
- watched_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem",
- output_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress")
This updates the 1001.toml
config file to:
model_number = '1001'
-files_to_track = [ 'lst', 'ext', 'grd' ]
-tmp_dir = '/tmp'
-watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem'
-output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress'
-poll_duration = 1
-alert = 'None'
-level = 'Debug'
-email = ''
-threads = 1
-topic = ''
We can now run submit_nonmem_model
and get essentially
-the same behavior as running with bbi
. On linux
-~/.local/bin/
will be on your path so saving binaries there
-is a good approach.
submission_nmm <- slurmtools::submit_nonmem_model(
- mod,
- overwrite = TRUE,
- slurm_job_template_path = file.path(nonmem, "slurm-job-nmm.tmpl"),
- slurm_template_opts = list(
- nmm_exe_path = normalizePath("~/.local/bin/nmm"))
-)
-
-submission_nmm
-#> $status
-#> [1] 0
-#>
-#> $stdout
-#> [1] "Submitted batch job 876\n"
-#>
-#> $stderr
-#> [1] ""
-#>
-#> $timeout
-#> [1] FALSE
slurmtools::get_slurm_jobs()
-#> # A tibble: 9 × 10
-#> job_id job_state cpus partition standard_input standard_output
-#> <int> <chr> <int> <chr> <chr> <chr>
-#> 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
-#> 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
-#> 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
-#> 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
-#> 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
-#> 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
-#> 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
-#> 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
-#> 9 876 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
-#> # ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
-#> # current_working_directory <chr>
The one difference between using nmm
compared to
-bbi
is that a new directory is created that contains a log
-file that caught some issues with our run. This file is updated as
-nonmem is running and monitors gradient values, parameters that hit
-zero, as well as other errors from bbi. Looking at the first few lines
-we can see that bbi
was successfully able to call nonmem.
-We also see an info level log that OMEGA(2,1) has 0 value – in our mod
-file we don’t specify any omega values off the diagonal so these are
-fixed at 0. Finally we see that GRD(6) hit 0 relatively early in the
-run.
20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="Successfully loaded default configuration from /cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/bbi.yaml"
-20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="Beginning Local Path"
-20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="A total of 1 models have completed the initial preparation phase"
-20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="[1001] Beginning local work phase"
-20:12:58 [INFO] OMEGA(2,1) has 0 value
-20:12:58 [INFO] SIGMA(2,1) has 0 value
-20:13:00 [INFO] SIGMA(2,1) has 0 value
-20:13:00 [INFO] OMEGA(2,1) has 0 value
-20:13:04 [INFO] SIGMA(2,1) has 0 value
-20:13:04 [INFO] OMEGA(2,1) has 0 value
-20:13:04 [WARN] "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/1001/1001.grd" has 0 gradient for parameter: GRD(6)
-After a run has finished several messages are sent to the log after a
-final check of the files listed in the files_to_track
field
-of the 1001.toml
file.
20:13:16 [INFO] Received Exit code: exit status: 0
-20:13:16 [WARN] 1001.ext: Missing ext final output lines. Observed lines were: [-1000000000.0, -1000000004.0, -1000000006.0, -1000000007.0]
-20:13:16 [WARN] "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/1001/1001.grd": The following parameters hit zero gradient through the run: ["GRD(6)"]
-We see that GRD(6) hit zero during the run and that only a subset of -the -1E9 lines were present in the .ext file.
-Like we did with bbi
and altering the slurm template
-file to get notifications from ntfy.sh
-nmm
has this feature built in! The messages in the log file
-that relate to zero gradients, missing -1E9 lines, and 0 parameter
-values can also be sent to ntfy by altering the 1001.toml
-file. We can get these alerts in real time without having to dig through
-a noisy log file.
Let’s update our call to generate_nmm_config
to have
-nmm
send notifications to the NONMEMmonitor
-topic on ntfy.sh.
slurmtools::generate_nmm_config(
- mod,
- alert = "Ntfy",
- topic = "NONMEMmonitor",
- watched_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem",
- output_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress")
This updates the 1001.toml
file to this:
model_number = '1001'
-files_to_track = [ 'lst', 'ext', 'grd' ]
-tmp_dir = '/tmp'
-watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem'
-output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress'
-poll_duration = 1
-alert = 'Ntfy'
-level = 'Debug'
-email = ''
-threads = 1
-topic = 'NONMEMmonitor'
When we re-run the submit_nonmem_model
call we will now
-get ntfy notifications. One thing to note is that nmm
will
-print full paths in the log, but will only send notifications with the
-model_number
(or
-model_number.file_extension
).
submission_nmm <- slurmtools::submit_nonmem_model(
- mod,
- overwrite = TRUE,
- slurm_job_template_path = file.path(nonmem, "slurm-job-nmm.tmpl"),
- slurm_template_opts = list(
- nmm_exe_path = normalizePath("~/.local/bin/nmm-x86_64-unknown-linux-gnu/nmm"))
-)
-#> Warning in normalizePath("~/.local/bin/nmm-x86_64-unknown-linux-gnu/nmm"):
-#> path[1]="/cluster-data/user-homes/matthews/.local/bin/nmm-x86_64-unknown-linux-gnu/nmm":
-#> No such file or directory
-
-submission_nmm
-#> $status
-#> [1] 0
-#>
-#> $stdout
-#> [1] "Submitted batch job 877\n"
-#>
-#> $stderr
-#> [1] ""
-#>
-#> $timeout
-#> [1] FALSE
slurmtools::get_slurm_jobs(user = "matthews")
-#> # A tibble: 10 × 10
-#> job_id job_state cpus partition standard_input standard_output
-#> <int> <chr> <int> <chr> <chr> <chr>
-#> 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 9 876 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 10 877 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> # ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
-#> # current_working_directory <chr>
This gives us the notifications in a much more digestible format
-library(slurmtools)
-library(bbr)
-library(here)
-
-nonmem = file.path(here::here(), "vignettes", "model", "nonmem")
-options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
mod_number <- "1001"
-
-if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) {
- mod <- bbr::read_model(file.path(nonmem, mod_number))
-} else {
- mod <- bbr::new_model(file.path(nonmem, mod_number))
-}
There is also functionality to pair nmm
with slack_notifier
-and get messages sent directly to you via a slack bot. This requires you
-to download the slack_notifier binaries and added them to your path so
-nmm
can find it. You can download the latest release and
-extract the binary and again save it to ~/.local/bin
.
Sys.which("slack_notifier")
-#> slack_notifier
-#> "/cluster-data/user-homes/matthews/.local/bin/slack_notifier"
slack_notifier requires an additional
-slack_notifier/config.yaml
file that contains the slack bot
-OAuth token which is found from [[https://api.slack.com/apps/\\](https://api.slack.com/apps/){.uri}<YOUR
-APP ID> /oauth?].
Again, we need to update the 1001.toml
file to get slack
-notifications. We need to set alert = "slack"
and provide
-the email
associated with the slack account in
-generate_nmm_config
.
slurmtools::generate_nmm_config(
- mod,
- alert = "slack",
- email = "matthews@a2-ai.com",
- watched_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem",
- output_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress")
This generates the following toml file:
-model_number = '1001'
-files_to_track = [ 'lst', 'ext', 'grd' ]
-tmp_dir = '/tmp'
-watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem'
-output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress'
-poll_duration = 1
-alert = 'Slack'
-level = 'Debug'
-email = 'matthews@a2-ai.com'
-threads = 1
-topic = ''
With alert = 'Slack'
and email
set in the
-1001.toml
file nmm
will send slack
-notifications directly to you when a NONMEM run starts and it will reply
-to that message with notifications if any gradients hit 0 and when the
-run finishes it checks if all -1E9 lines are present in the .ext file
-and gives another message about any parameters that hit 0 gradient.
submission_nmm <- slurmtools::submit_nonmem_model(
- mod,
- overwrite = TRUE,
- slurm_job_template_path = file.path(nonmem, "slurm-job-nmm.tmpl"),
- slurm_template_opts = list(
- nmm_exe_path = normalizePath("~/.local/bin/nmm")
- )
-)
-
-submission_nmm
-#> $status
-#> [1] 0
-#>
-#> $stdout
-#> [1] "Submitted batch job 880\n"
-#>
-#> $stderr
-#> [1] ""
-#>
-#> $timeout
-#> [1] FALSE
slurmtools::get_slurm_jobs()
-#> # A tibble: 13 × 10
-#> job_id job_state cpus partition standard_input standard_output
-#> <int> <chr> <int> <chr> <chr> <chr>
-#> 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
-#> 9 876 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 10 877 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 11 878 RUNNING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 12 879 COMPLETED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> 13 880 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
-#> # ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
-#> # current_working_directory <chr>