diff --git a/.gitignore b/.gitignore index 3c3a34f..7e6348b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ .Rproj.user docs -inst/doc /doc/ /Meta/ diff --git a/inst/doc/Running-nonmem.R b/inst/doc/Running-nonmem.R new file mode 100644 index 0000000..0f22cd3 --- /dev/null +++ b/inst/doc/Running-nonmem.R @@ -0,0 +1,81 @@ +## ----include = FALSE---------------------------------------------------------- +#removing generated files from running this vignette +nonmem <- file.path("model", "nonmem") + +unlink(file.path(nonmem, "1001"), recursive = TRUE) +unlink(file.path(nonmem, "1001.yaml")) +unlink(file.path(nonmem, "1001.toml")) +unlink(file.path(nonmem, "submission-log"), recursive = TRUE) +unlink(file.path(nonmem, "in_progress"), recursive = TRUE) + +## ----include = FALSE---------------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "" +) + +## ----setup-------------------------------------------------------------------- +library(slurmtools) + +## ----------------------------------------------------------------------------- +Sys.which("bbi") + +## ----------------------------------------------------------------------------- +library(bbr) +library(here) + +nonmem = file.path(here::here(), "vignettes", "model", "nonmem") + +options('slurmtools.submission_root' = file.path(nonmem, "submission-log")) +options('slurmtools.bbi_config_path' = file.path(nonmem, "bbi.yaml")) + +## ----------------------------------------------------------------------------- +mod_number <- "1001" + +if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) { + mod <- bbr::read_model(file.path(nonmem, mod_number)) +} else { + mod <- bbr::new_model(file.path(nonmem, mod_number)) +} + +## ----------------------------------------------------------------------------- +submission <- slurmtools::submit_nonmem_model( + mod, + slurm_job_template_path = file.path(nonmem, "slurm-job-bbi.tmpl"), +) + +submission + +## ----------------------------------------------------------------------------- +slurmtools::get_slurm_jobs(user = 'matthews') + +## ----------------------------------------------------------------------------- +submission_ntfy <- slurmtools::submit_nonmem_model( + mod, + slurm_job_template_path = file.path(nonmem, "slurm-job-bbi-ntfy.tmpl"), + overwrite = TRUE, + slurm_template_opts = list( + ntfy = "ntfy_demo") +) + +submission_ntfy + +## ----include = FALSE---------------------------------------------------------- +#cancelling any running nonmem jobs +state <- slurmtools::get_slurm_jobs(user = "matthews") + +if (any(state$job_state %in% c("RUNNING", "CONFIGURING"))) { + for (job_id in state %>% dplyr::filter(job_state == "RUNNING") %>% dplyr::pull("job_id")) { + processx::run("scancel", args = paste0(job_id)) + } +} + +#removing generated files from running this vignette +nonmem <- file.path("model", "nonmem") + +unlink(file.path(nonmem, "1001"), recursive = TRUE) +unlink(file.path(nonmem, "1001.yaml")) +unlink(file.path(nonmem, "1001.toml")) +unlink(file.path(nonmem, "submission-log"), recursive = TRUE) +unlink(file.path(nonmem, "in_progress"), recursive = TRUE) + diff --git a/inst/doc/Running-nonmem.Rmd b/inst/doc/Running-nonmem.Rmd new file mode 100644 index 0000000..ce7a020 --- /dev/null +++ b/inst/doc/Running-nonmem.Rmd @@ -0,0 +1,269 @@ +--- +title: "Running Nonmem with slurmtools" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Running Nonmem with slurmtools} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +#removing generated files from running this vignette +nonmem <- file.path("model", "nonmem") + +unlink(file.path(nonmem, "1001"), recursive = TRUE) +unlink(file.path(nonmem, "1001.yaml")) +unlink(file.path(nonmem, "1001.toml")) +unlink(file.path(nonmem, "submission-log"), recursive = TRUE) +unlink(file.path(nonmem, "in_progress"), recursive = TRUE) +``` + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "" +) +``` + +# Slurmtools for submitting NONMEM runs + +`slurmtools` is an R package for interacting with slurm (fka **S**imple **L**inux **U**tility for **R**esource **M**anagement) and submitting NONMEM jobs. You can submit a NONMEM job with `submit_nonmem_model`, you can view current jobs with `get_slurm_jobs`, and you can see the available partitions with `get_slurm_partitions`. + +## Installing `slurmtools` + +To install `slurmtools` use the following commands: + +``` r +options(repos = c( + "slurmtools" = "https://a2-ai.github.io/gh-pkg-mirror/slurmtools", + getOption("repos"))) +install.packages("slurmtools") +``` + +```{r setup} +library(slurmtools) +``` + +We are given a message when loading slurmtools that some options are not set and that default job submission will not work without them. These options are used for default arguments in the `submit_nonmem_model` function. Running `?submit_nonmem_model` we can see the documentation + +![Help view for `submit_nonmem_model` function](data/images/submit_nonmem_model_help.png) + +This function uses the inputs to populate a template Bash shell script that submits the NONMEM job to slurm. A default template file is supplied with the Project Starter and it can be modified to do additional tasks as long as they are possible within Bash. + +By default these values are provided to the slurm template file: + +``` r +default_template_list = list( + partition = partition, + parallel = parallel, + ncpu = ncpu, + job_name = sprintf("%s-nonmem-run", basename(.mod$absolute_model_path)), + project_path = project_path, + project_name = project_name, + bbi_exe_path = Sys.which("bbi"), + bbi_config_path = bbi_config_path, + model_path = .mod$absolute_model_path, + config_toml_path = config_toml_path, + nmm_exe_path = Sys.which("nmm") +) +``` + +- `partition` is an argument to `submit_nonmem_model` + +- `parallel` is `TRUE` if `ncpu > 1`, else `FALSE` + +- `ncpu` is an argument to `submit_nonmem_model` + +- `job_name` is created from the `.mod` argument supplied to `submit_nonmem_model` + +- `bbi_exe_path` is determined via \`Sys.which("bbi") + +- `bbi_config_path` is determined via getOption("slurmtools.bbi_config_path") + +- `model_path` is determined from the `.mod` argument supplied to `submit_nonmem_model` + +- `config_toml_path` is determined from the `.mod` argument supplied to `submit_nonmem_model` and is requried to use `nmm` (NONMEM monitor) + +- `nmm_exe_path` is determined via `Sys.which("nmm")` + +If you need to feed more arguments to the template you simply supply them in the `slurm_template_opts` argument as a list. More on that later. + +## Submitting a NONMEM job with `bbi` + +To submit a NONMEM job, we need to supply either the path to a mod file or create a model object from `bbr`, and supply a `slurm-template.tmpl` file. To use `bbi` we also need a `bbi.yaml` file, which I've also supplied in `/model/nonmem/bbi.yaml` (and is also supplied with the R project starter). + +Here is an example of a template file that will call `bbi`: + +``` slurm-job-bbi.tmpl +#!/bin/bash +#SBATCH --job-name="{{job_name}}" +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task={{ncpu}} +#SBATCH --partition={{partition}} +#SBATCH --account={{project_name}} + +# submit_nonmem_model uses the whisker package to populate template files +# https://github.com/edwindj/whisker + +{{#parallel}} +{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}} +{{/parallel}} + + +{{^parallel}} +{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}} +{{/parallel}} +``` + +This file will call `bbi` to run our supplied model (`{{model_path}}.mod`) if `ncpu > 1` then parallel will be true and the code between `{{#parallel}}` and `{{/parallel}}` will be populated. if `ncpu = 1` then parallel will be false and the code between `{{^parallel}}` and `{{/parallel}}` will be populated. By default, `submit_nonmem_model` will inject `Sys.which("bbi")` into the template, so if `bbi` is not on your path we'll have to supply the `bbi_exe_path` for it to start the NONMEM run. + +```{r} +Sys.which("bbi") +``` + +We will use a few different template files with different functionality so we'll inject those template file paths to `submit_nonmem_model`. However, we'll use the `submission-log` directory for the output, so we'll set that option as well as `bbi_config_path` so `submit_nonmem_model` defaults can be used. The slurm template files are saved in `~/model/nonmem/` Additionally, there is a simple NONMEM control stream in `1001.mod` in the same directory that we can use for testing. + +```{r} +library(bbr) +library(here) + +nonmem = file.path(here::here(), "vignettes", "model", "nonmem") + +options('slurmtools.submission_root' = file.path(nonmem, "submission-log")) +options('slurmtools.bbi_config_path' = file.path(nonmem, "bbi.yaml")) +``` + +To create the `bbr` model object, we need to have both `1001.mod` and `1001.yaml` which contains metadata about the model in the supplied directory (`./model/nonmem/`). We'll check for mod_number.yaml and if it exists, read in the model otherwise create it and then read it. + +```{r} +mod_number <- "1001" + +if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) { + mod <- bbr::read_model(file.path(nonmem, mod_number)) +} else { + mod <- bbr::new_model(file.path(nonmem, mod_number)) +} +``` + +We can now submit the job and point to the template file in `model/nonmem/slurm-job-bbi.tmpl`. + +```{r} +submission <- slurmtools::submit_nonmem_model( + mod, + slurm_job_template_path = file.path(nonmem, "slurm-job-bbi.tmpl"), +) + +submission +``` + +We see a `status` with an exit code of 0 suggesting a successful command, and the `stdout` gives us the batch job number. We can use `slurmtools::get_slurm_jobs()` to monitor the status of the job. Here, we can supply the user = "matthews" argument to filter to just the jobs I've submitted. + +```{r} +slurmtools::get_slurm_jobs(user = 'matthews') +``` + +If we look in the `slurmtools.submisstion_root` directory we can see the shell script that was generated with `submit_nonmem_model`. Here is the whisker replaced call to bbi: + +``` 1001.sh +/usr/local/bin/bbi nonmem run local /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/1001.mod --config /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/bbi.yaml +``` + +## Extending templates + +Because the templates create a bash shell script there is an almost infinite number of things we can do with our template. **Anything you can do in bash you can do by appropriately updating the template file and injecting the needed information!** + +Let's add a notification feature that will send a notification when the job has started and finished. We can use [ntfy.sh](ntfy.sh) and add the necessary info to our template to achieve this. + +Here is a modified template file that adds a `JOBID=$SLURM_JOBID` and some ntfy calls. To get a notification we can supply `submit_nonmem_model` with `ntfy` variable to send notifications. I'll use `ntfy = ntfy_demo` for this. + +``` slurm-job-bbi-ntfy.tmpl +#!/bin/bash +#SBATCH --job-name="{{job_name}}" +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task={{ncpu}} +#SBATCH --partition={{partition}} +#SBATCH --account={{project_name}} + +JOBID=$SLURM_JOBID + +# submit_nonmem_model uses the whisker package to populate template files +# https://github.com/edwindj/whisker + +{{#ntfy}} +curl -d "Starting model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}} +{{/ntfy}} + +{{#parallel}} +{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}} +{{/parallel}} + +{{^parallel}} +{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}} +{{/parallel}} + +{{#ntfy}} +curl -d "Finished model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}} +{{/ntfy}} +``` + +Since we've already run this model we will provide the `overwrite = TRUE` argument to force a new nonmem run. + +```{r} +submission_ntfy <- slurmtools::submit_nonmem_model( + mod, + slurm_job_template_path = file.path(nonmem, "slurm-job-bbi-ntfy.tmpl"), + overwrite = TRUE, + slurm_template_opts = list( + ntfy = "ntfy_demo") +) + +submission_ntfy +``` + +We again get a 0 exit code status and now instead of using `slurmtools::get_slurm_jobs()` to monitor the job, we can rely on the new notifications we just set up. ![NONMEM job starting ntfy alert](data/images/ntfy_starting.png) + +and when the run finished we get another notification: ![NONMEM Job finished ntfy alert](data/images/ntfy_finished.png) + +Note that the run number will match the run specified in `submission$stdout`. We can see the new shell script this updated template file generated + +``` 1001.sh +#!/bin/bash +#SBATCH --job-name="1001-nonmem-run" +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --partition=cpu2mem4gb +#SBATCH --account=slurmtools + +JOBID=$SLURM_JOBID + +curl -d "Starting model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo + +/usr/local/bin/bbi nonmem run local /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/1001.mod --config /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/bbi.yaml + +curl -d "Finished model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo +``` + +To reiterate, this template file is run as a bash shell script so anything you can do in bash you can put into the template and pass the needed arguments and customize the behavior to your liking. + +```{r, include = FALSE} +#cancelling any running nonmem jobs +state <- slurmtools::get_slurm_jobs(user = "matthews") + +if (any(state$job_state %in% c("RUNNING", "CONFIGURING"))) { + for (job_id in state %>% dplyr::filter(job_state == "RUNNING") %>% dplyr::pull("job_id")) { + processx::run("scancel", args = paste0(job_id)) + } +} + +#removing generated files from running this vignette +nonmem <- file.path("model", "nonmem") + +unlink(file.path(nonmem, "1001"), recursive = TRUE) +unlink(file.path(nonmem, "1001.yaml")) +unlink(file.path(nonmem, "1001.toml")) +unlink(file.path(nonmem, "submission-log"), recursive = TRUE) +unlink(file.path(nonmem, "in_progress"), recursive = TRUE) +``` diff --git a/inst/doc/Running-nonmem.html b/inst/doc/Running-nonmem.html new file mode 100644 index 0000000..94827a2 --- /dev/null +++ b/inst/doc/Running-nonmem.html @@ -0,0 +1,638 @@ + + + + +
+ + + + + + + + + +slurmtools
is an R package for interacting with slurm
+(fka Simple Linux
+Utility for Resource
+Management) and submitting NONMEM jobs. You can submit
+a NONMEM job with submit_nonmem_model
, you can view current
+jobs with get_slurm_jobs
, and you can see the available
+partitions with get_slurm_partitions
.
slurmtools
To install slurmtools
use the following commands:
options(repos = c(
+ "slurmtools" = "https://a2-ai.github.io/gh-pkg-mirror/slurmtools",
+ getOption("repos")))
+install.packages("slurmtools")
We are given a message when loading slurmtools that some options are
+not set and that default job submission will not work without them.
+These options are used for default arguments in the
+submit_nonmem_model
function. Running
+?submit_nonmem_model
we can see the documentation
This function uses the inputs to populate a template Bash shell +script that submits the NONMEM job to slurm. A default template file is +supplied with the Project Starter and it can be modified to do +additional tasks as long as they are possible within Bash.
+By default these values are provided to the slurm template file:
+default_template_list = list(
+ partition = partition,
+ parallel = parallel,
+ ncpu = ncpu,
+ job_name = sprintf("%s-nonmem-run", basename(.mod$absolute_model_path)),
+ project_path = project_path,
+ project_name = project_name,
+ bbi_exe_path = Sys.which("bbi"),
+ bbi_config_path = bbi_config_path,
+ model_path = .mod$absolute_model_path,
+ config_toml_path = config_toml_path,
+ nmm_exe_path = Sys.which("nmm")
+)
partition
is an argument to
+submit_nonmem_model
parallel
is TRUE
if
+ncpu > 1
, else FALSE
ncpu
is an argument to
+submit_nonmem_model
job_name
is created from the .mod
+argument supplied to submit_nonmem_model
bbi_exe_path
is determined via
+`Sys.which(“bbi”)
bbi_config_path
is determined via
+getOption(“slurmtools.bbi_config_path”)
model_path
is determined from the .mod
+argument supplied to submit_nonmem_model
config_toml_path
is determined from the
+.mod
argument supplied to submit_nonmem_model
+and is requried to use nmm
(NONMEM monitor)
nmm_exe_path
is determined via
+Sys.which("nmm")
If you need to feed more arguments to the template you simply supply
+them in the slurm_template_opts
argument as a list. More on
+that later.
bbi
To submit a NONMEM job, we need to supply either the path to a mod
+file or create a model object from bbr
, and supply a
+slurm-template.tmpl
file. To use bbi
we also
+need a bbi.yaml
file, which I’ve also supplied in
+/model/nonmem/bbi.yaml
(and is also supplied with the R
+project starter).
Here is an example of a template file that will call
+bbi
:
#!/bin/bash
+#SBATCH --job-name="{{job_name}}"
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task={{ncpu}}
+#SBATCH --partition={{partition}}
+#SBATCH --account={{project_name}}
+
+# submit_nonmem_model uses the whisker package to populate template files
+# https://github.com/edwindj/whisker
+
+{{#parallel}}
+{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}}
+{{/parallel}}
+
+
+{{^parallel}}
+{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}}
+{{/parallel}}
+This file will call bbi
to run our supplied model
+({{model_path}}.mod
) if ncpu > 1
then
+parallel will be true and the code between {{#parallel}}
+and {{/parallel}}
will be populated. if
+ncpu = 1
then parallel will be false and the code between
+{{^parallel}}
and {{/parallel}}
will be
+populated. By default, submit_nonmem_model
will inject
+Sys.which("bbi")
into the template, so if bbi
+is not on your path we’ll have to supply the bbi_exe_path
+for it to start the NONMEM run.
We will use a few different template files with different
+functionality so we’ll inject those template file paths to
+submit_nonmem_model
. However, we’ll use the
+submission-log
directory for the output, so we’ll set that
+option as well as bbi_config_path
so
+submit_nonmem_model
defaults can be used. The slurm
+template files are saved in ~/model/nonmem/
Additionally,
+there is a simple NONMEM control stream in 1001.mod
in the
+same directory that we can use for testing.
library(bbr)
+library(here)
+
+nonmem = file.path(here::here(), "vignettes", "model", "nonmem")
+
+options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
+options('slurmtools.bbi_config_path' = file.path(nonmem, "bbi.yaml"))
To create the bbr
model object, we need to have both
+1001.mod
and 1001.yaml
which contains metadata
+about the model in the supplied directory
+(./model/nonmem/
). We’ll check for mod_number.yaml and if
+it exists, read in the model otherwise create it and then read it.
mod_number <- "1001"
+
+if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) {
+ mod <- bbr::read_model(file.path(nonmem, mod_number))
+} else {
+ mod <- bbr::new_model(file.path(nonmem, mod_number))
+}
We can now submit the job and point to the template file in
+model/nonmem/slurm-job-bbi.tmpl
.
submission <- slurmtools::submit_nonmem_model(
+ mod,
+ slurm_job_template_path = file.path(nonmem, "slurm-job-bbi.tmpl"),
+)
+
+submission
+$status
+[1] 0
+
+$stdout
+[1] "Submitted batch job 878\n"
+
+$stderr
+[1] ""
+
+$timeout
+[1] FALSE
We see a status
with an exit code of 0 suggesting a
+successful command, and the stdout
gives us the batch job
+number. We can use slurmtools::get_slurm_jobs()
to monitor
+the status of the job. Here, we can supply the user = “matthews”
+argument to filter to just the jobs I’ve submitted.
slurmtools::get_slurm_jobs(user = 'matthews')
+# A tibble: 11 × 10
+ job_id job_state cpus partition standard_input standard_output
+ <int> <chr> <int> <chr> <chr> <chr>
+ 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+ 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+ 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+ 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+ 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+ 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+ 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+ 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+ 9 876 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+10 877 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+11 878 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+# ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
+# current_working_directory <chr>
If we look in the slurmtools.submisstion_root
directory
+we can see the shell script that was generated with
+submit_nonmem_model
. Here is the whisker replaced call to
+bbi:
Because the templates create a bash shell script there is an almost +infinite number of things we can do with our template. Anything +you can do in bash you can do by appropriately updating the template +file and injecting the needed information!
+Let’s add a notification feature that will send a notification when +the job has started and finished. We can use ntfy.sh and add the necessary info to our template to +achieve this.
+Here is a modified template file that adds a
+JOBID=$SLURM_JOBID
and some ntfy calls. To get a
+notification we can supply submit_nonmem_model
with
+ntfy
variable to send notifications. I’ll use
+ntfy = ntfy_demo
for this.
#!/bin/bash
+#SBATCH --job-name="{{job_name}}"
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task={{ncpu}}
+#SBATCH --partition={{partition}}
+#SBATCH --account={{project_name}}
+
+JOBID=$SLURM_JOBID
+
+# submit_nonmem_model uses the whisker package to populate template files
+# https://github.com/edwindj/whisker
+
+{{#ntfy}}
+curl -d "Starting model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}}
+{{/ntfy}}
+
+{{#parallel}}
+{{bbi_exe_path}} nonmem run local {{model_path}}.mod --parallel --threads={{ncpu}} --config {{bbi_config_path}}
+{{/parallel}}
+
+{{^parallel}}
+{{bbi_exe_path}} nonmem run local {{model_path}}.mod --config {{bbi_config_path}}
+{{/parallel}}
+
+{{#ntfy}}
+curl -d "Finished model run: {{job_name}} $JOBID" ntfy.sh/{{ntfy}}
+{{/ntfy}}
+Since we’ve already run this model we will provide the
+overwrite = TRUE
argument to force a new nonmem run.
submission_ntfy <- slurmtools::submit_nonmem_model(
+ mod,
+ slurm_job_template_path = file.path(nonmem, "slurm-job-bbi-ntfy.tmpl"),
+ overwrite = TRUE,
+ slurm_template_opts = list(
+ ntfy = "ntfy_demo")
+)
+
+submission_ntfy
+$status
+[1] 0
+
+$stdout
+[1] "Submitted batch job 879\n"
+
+$stderr
+[1] ""
+
+$timeout
+[1] FALSE
We again get a 0 exit code status and now instead of using
+slurmtools::get_slurm_jobs()
to monitor the job, we can
+rely on the new notifications we just set up.
and when the run finished we get another notification:
+Note that the run number will match the run specified in
+submission$stdout
. We can see the new shell script this
+updated template file generated
#!/bin/bash
+#SBATCH --job-name="1001-nonmem-run"
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --partition=cpu2mem4gb
+#SBATCH --account=slurmtools
+
+JOBID=$SLURM_JOBID
+
+curl -d "Starting model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo
+
+/usr/local/bin/bbi nonmem run local /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/1001.mod --config /cluster-data/user-homes/matthews/Projects/slurmtools_vignette/model/nonmem/bbi.yaml
+
+curl -d "Finished model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo
To reiterate, this template file is run as a bash shell script so +anything you can do in bash you can put into the template and pass the +needed arguments and customize the behavior to your liking.
+library(slurmtools)
+#> ── Needed slurmtools options ───────────────────────────────────────────────────
+#> ✖ option('slurmtools.slurm_job_template_path') is not set.
+#> ✖ option('slurmtools.submission_root') is not set.
+#> ✖ option('slurmtools.bbi_config_path') is not set.
+#> ℹ Please set all options for job submission defaults to work.
+library(bbr)
+library(here)
+#> here() starts at /cluster-data/user-homes/matthews/Packages/slurmtools
+
+nonmem = file.path(here::here(), "vignettes", "model", "nonmem")
+options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
Instead of using bbi we can use nmm
(NONMEM Monitor) which
+currently has some additional functionality of sending notifications
+about zero gradients, missing -1E9 lines in ext file, and some very
+basic control stream errors. Currently, only slack or ntfy.sh is supported for receiving notifications. To
+use nmm
you can install the latest release from the github
+repository linked above.
We can update the template file accordingly:
+#!/bin/bash
+#SBATCH --job-name="{{job_name}}"
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task={{ncpu}}
+#SBATCH --partition={{partition}}
+
+{{nmm_exe_path}} -c {{config_toml_path}} run
+default, submit_nonmem_model
will provide
+nmm_exe_path
and config_toml_path
to the
+template. Just like with bbi_exe_path
,
+nmm_exe_path
is determined with
+Sys.which("nmm")
which may or may not give you the path to
+the nmm binary if it is on your path or not. We can inject the
+nmm_exe_path
like we did with bbi_exe_path
and
+assume it’s not on our path.
The config.toml
file controls what nmm
will
+monitor and where to look for files and how to alert you. We’ll use
+generate_nmm_config()
to create this file. First we can
+look at the documentation to see what type of information we should pass
+to this function.
mod_number <- "1001"
+
+if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) {
+ mod <- bbr::read_model(file.path(nonmem, mod_number))
+} else {
+ mod <- bbr::new_model(file.path(nonmem, mod_number))
+}
This generates the following toml file. Notice that alert is set to
+‘None’, and both email and topic are empty. Since we’re in vignettes
+we’ll need to update the watched_dir
and
+output_dir
.
model_number = '1001'
+files_to_track = [ 'lst', 'ext', 'grd' ]
+tmp_dir = '/tmp'
+watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/model/nonmem'
+output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/model/nonmem/in_progress'
+poll_duration = 1
+alert = 'None'
+level = 'Debug'
+email = ''
+threads = 1
+topic = ''
slurmtools::generate_nmm_config(
+ mod,
+ watched_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem",
+ output_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress")
This updates the 1001.toml
config file to:
model_number = '1001'
+files_to_track = [ 'lst', 'ext', 'grd' ]
+tmp_dir = '/tmp'
+watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem'
+output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress'
+poll_duration = 1
+alert = 'None'
+level = 'Debug'
+email = ''
+threads = 1
+topic = ''
We can now run submit_nonmem_model
and get essentially
+the same behavior as running with bbi
. On linux
+~/.local/bin/
will be on your path so saving binaries there
+is a good approach.
submission_nmm <- slurmtools::submit_nonmem_model(
+ mod,
+ overwrite = TRUE,
+ slurm_job_template_path = file.path(nonmem, "slurm-job-nmm.tmpl"),
+ slurm_template_opts = list(
+ nmm_exe_path = normalizePath("~/.local/bin/nmm"))
+)
+
+submission_nmm
+#> $status
+#> [1] 0
+#>
+#> $stdout
+#> [1] "Submitted batch job 876\n"
+#>
+#> $stderr
+#> [1] ""
+#>
+#> $timeout
+#> [1] FALSE
slurmtools::get_slurm_jobs()
+#> # A tibble: 9 × 10
+#> job_id job_state cpus partition standard_input standard_output
+#> <int> <chr> <int> <chr> <chr> <chr>
+#> 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
+#> 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
+#> 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
+#> 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
+#> 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
+#> 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
+#> 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
+#> 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d0…
+#> 9 876 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/mat…
+#> # ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
+#> # current_working_directory <chr>
The one difference between using nmm
compared to
+bbi
is that a new directory is created that contains a log
+file that caught some issues with our run. This file is updated as
+nonmem is running and monitors gradient values, parameters that hit
+zero, as well as other errors from bbi. Looking at the first few lines
+we can see that bbi
was successfully able to call nonmem.
+We also see an info level log that OMEGA(2,1) has 0 value – in our mod
+file we don’t specify any omega values off the diagonal so these are
+fixed at 0. Finally we see that GRD(6) hit 0 relatively early in the
+run.
20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="Successfully loaded default configuration from /cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/bbi.yaml"
+20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="Beginning Local Path"
+20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="A total of 1 models have completed the initial preparation phase"
+20:12:36 [INFO] bbi log: time="2024-08-27T20:12:36Z" level=info msg="[1001] Beginning local work phase"
+20:12:58 [INFO] OMEGA(2,1) has 0 value
+20:12:58 [INFO] SIGMA(2,1) has 0 value
+20:13:00 [INFO] SIGMA(2,1) has 0 value
+20:13:00 [INFO] OMEGA(2,1) has 0 value
+20:13:04 [INFO] SIGMA(2,1) has 0 value
+20:13:04 [INFO] OMEGA(2,1) has 0 value
+20:13:04 [WARN] "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/1001/1001.grd" has 0 gradient for parameter: GRD(6)
+After a run has finished several messages are sent to the log after a
+final check of the files listed in the files_to_track
field
+of the 1001.toml
file.
20:13:16 [INFO] Received Exit code: exit status: 0
+20:13:16 [WARN] 1001.ext: Missing ext final output lines. Observed lines were: [-1000000000.0, -1000000004.0, -1000000006.0, -1000000007.0]
+20:13:16 [WARN] "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/1001/1001.grd": The following parameters hit zero gradient through the run: ["GRD(6)"]
+We see that GRD(6) hit zero during the run and that only a subset of +the -1E9 lines were present in the .ext file.
+Like we did with bbi
and altering the slurm template
+file to get notifications from ntfy.sh
+nmm
has this feature built in! The messages in the log file
+that relate to zero gradients, missing -1E9 lines, and 0 parameter
+values can also be sent to ntfy by altering the 1001.toml
+file. We can get these alerts in real time without having to dig through
+a noisy log file.
Let’s update our call to generate_nmm_config
to have
+nmm
send notifications to the NONMEMmonitor
+topic on ntfy.sh.
slurmtools::generate_nmm_config(
+ mod,
+ alert = "Ntfy",
+ topic = "NONMEMmonitor",
+ watched_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem",
+ output_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress")
This updates the 1001.toml
file to this:
model_number = '1001'
+files_to_track = [ 'lst', 'ext', 'grd' ]
+tmp_dir = '/tmp'
+watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem'
+output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress'
+poll_duration = 1
+alert = 'Ntfy'
+level = 'Debug'
+email = ''
+threads = 1
+topic = 'NONMEMmonitor'
When we re-run the submit_nonmem_model
call we will now
+get ntfy notifications. One thing to note is that nmm
will
+print full paths in the log, but will only send notifications with the
+model_number
(or
+model_number.file_extension
).
submission_nmm <- slurmtools::submit_nonmem_model(
+ mod,
+ overwrite = TRUE,
+ slurm_job_template_path = file.path(nonmem, "slurm-job-nmm.tmpl"),
+ slurm_template_opts = list(
+ nmm_exe_path = normalizePath("~/.local/bin/nmm-x86_64-unknown-linux-gnu/nmm"))
+)
+#> Warning in normalizePath("~/.local/bin/nmm-x86_64-unknown-linux-gnu/nmm"):
+#> path[1]="/cluster-data/user-homes/matthews/.local/bin/nmm-x86_64-unknown-linux-gnu/nmm":
+#> No such file or directory
+
+submission_nmm
+#> $status
+#> [1] 0
+#>
+#> $stdout
+#> [1] "Submitted batch job 877\n"
+#>
+#> $stderr
+#> [1] ""
+#>
+#> $timeout
+#> [1] FALSE
slurmtools::get_slurm_jobs(user = "matthews")
+#> # A tibble: 10 × 10
+#> job_id job_state cpus partition standard_input standard_output
+#> <int> <chr> <int> <chr> <chr> <chr>
+#> 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 9 876 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 10 877 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> # ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
+#> # current_working_directory <chr>
This gives us the notifications in a much more digestible format
+library(slurmtools)
+library(bbr)
+library(here)
+
+nonmem = file.path(here::here(), "vignettes", "model", "nonmem")
+options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
mod_number <- "1001"
+
+if (file.exists(file.path(nonmem, paste0(mod_number, ".yaml")))) {
+ mod <- bbr::read_model(file.path(nonmem, mod_number))
+} else {
+ mod <- bbr::new_model(file.path(nonmem, mod_number))
+}
There is also functionality to pair nmm
with slack_notifier
+and get messages sent directly to you via a slack bot. This requires you
+to download the slack_notifier binaries and added them to your path so
+nmm
can find it. You can download the latest release and
+extract the binary and again save it to ~/.local/bin
.
Sys.which("slack_notifier")
+#> slack_notifier
+#> "/cluster-data/user-homes/matthews/.local/bin/slack_notifier"
slack_notifier requires an additional
+slack_notifier/config.yaml
file that contains the slack bot
+OAuth token which is found from [[https://api.slack.com/apps/\\](https://api.slack.com/apps/){.uri}<YOUR
+APP ID> /oauth?].
Again, we need to update the 1001.toml
file to get slack
+notifications. We need to set alert = "slack"
and provide
+the email
associated with the slack account in
+generate_nmm_config
.
slurmtools::generate_nmm_config(
+ mod,
+ alert = "slack",
+ email = "matthews@a2-ai.com",
+ watched_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem",
+ output_dir = "/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress")
This generates the following toml file:
+model_number = '1001'
+files_to_track = [ 'lst', 'ext', 'grd' ]
+tmp_dir = '/tmp'
+watched_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem'
+output_dir = '/cluster-data/user-homes/matthews/Packages/slurmtools/vignettes/model/nonmem/in_progress'
+poll_duration = 1
+alert = 'Slack'
+level = 'Debug'
+email = 'matthews@a2-ai.com'
+threads = 1
+topic = ''
With alert = 'Slack'
and email
set in the
+1001.toml
file nmm
will send slack
+notifications directly to you when a NONMEM run starts and it will reply
+to that message with notifications if any gradients hit 0 and when the
+run finishes it checks if all -1E9 lines are present in the .ext file
+and gives another message about any parameters that hit 0 gradient.
submission_nmm <- slurmtools::submit_nonmem_model(
+ mod,
+ overwrite = TRUE,
+ slurm_job_template_path = file.path(nonmem, "slurm-job-nmm.tmpl"),
+ slurm_template_opts = list(
+ nmm_exe_path = normalizePath("~/.local/bin/nmm")
+ )
+)
+
+submission_nmm
+#> $status
+#> [1] 0
+#>
+#> $stdout
+#> [1] "Submitted batch job 880\n"
+#>
+#> $stderr
+#> [1] ""
+#>
+#> $timeout
+#> [1] FALSE
slurmtools::get_slurm_jobs()
+#> # A tibble: 13 × 10
+#> job_id job_state cpus partition standard_input standard_output
+#> <int> <chr> <int> <chr> <chr> <chr>
+#> 1 868 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 2 869 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 3 870 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 4 871 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 5 872 COMPLETED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 6 873 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 7 874 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 8 875 FAILED 1 cpu2mem4gb /dev/null /tmp/RtmpIBC04D/Rbuild1af3d…
+#> 9 876 CANCELLED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 10 877 FAILED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 11 878 RUNNING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 12 879 COMPLETED 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> 13 880 PENDING 1 cpu2mem4gb /dev/null /cluster-data/user-homes/ma…
+#> # ℹ 4 more variables: submit_time <dttm>, start_time <dttm>, user_name <chr>,
+#> # current_working_directory <chr>