diff --git a/.Rbuildignore b/.Rbuildignore index e5afaad..c80b62d 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,5 @@ ^.*\.Rproj$ ^\.Rproj\.user$ ^\.travis\.yml$ -cran-comments.md +^cran-comments\.md +^release\.R diff --git a/.gitignore b/.gitignore index 09a72cb..807ea25 100755 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ .Rproj.user .Rhistory .RData -inst/doc diff --git a/.travis.yml b/.travis.yml index b79e15a..fc438d5 100755 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,8 @@ language: r warnings_are_errors: true sudo: required +r_build_args: "--no-build-vignettes" +r_check_args: "--no-vignettes --as-cran" env: global: diff --git a/DESCRIPTION b/DESCRIPTION old mode 100755 new mode 100644 index 7def5d9..2758526 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: rslurm Type: Package Title: Submit R Calculations to a 'SLURM' Cluster -Description: Functions that simplify the R interface the 'SLURM' cluster workload - manager, and automate the process of dividing a parallel calculation across - cluster nodes. -Version: 0.3.2 +Description: Functions that simplify submitting R scripts to a 'SLURM' cluster + workload manager, in part by automating the division of embarrassingly parallel + calculations across cluster nodes. +Version: 0.3.3 License: GPL-3 URL: https://github.com/SESYNC-ci/rslurm BugReports: https://github.com/SESYNC-ci/rslurm/issues @@ -18,7 +18,8 @@ Imports: parallel, whisker (>= 0.3) RoxygenNote: 6.0.1 -Suggests: testthat, +Suggests: + testthat, knitr, rmarkdown VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md deleted file mode 100755 index fb90110..0000000 --- a/NEWS.md +++ /dev/null @@ -1,109 +0,0 @@ -rslurm 0.3.2 -============ - -*2017-03-07* - -Minor update to include new feature and bug fixes: - -* 'Wait' feature: adds option to slurm_apply and slurm_call to block the -calling script until the submitted job completes. This option can be used to -allow immediate processing of a submitted job's output. - - -rslurm 0.3.1 -============ - -*2016-06-18* - -* Minor bug fix: specify full path of 'Rscript' when running batch scripts. - - -rslurm 0.3.0 -============ - -*2016-05-27* - -*First version on CRAN* - -Major update to the package interface and implementation: - -* Added a `submit` argument to `slurm_apply` and `slurm_call`. If `submit = FALSE`, -the submission scripts are created but not run. This is useful if the files need -to be transferred from a local machine to the cluster and run at a later time. - -* Added new optional arguments to `slurm_apply` and `slurm_call`, allowing users to give -informative names to SLURM jobs (`jobname`) and set any options understood by -`sbatch` (`slurm_options`). - -* The `data_file` arugment to `slurm_apply` and `slurm_call` is replaced with -`add_objects`, which accepts a vector of R object names from the active workspace -and automatically saves them in a .RData file to be loaded on each node. - -* `slurm_apply` and `slurm_call` now generate R and Bash scripts through -[whisker](https://github.com/edwindj/whisker) templates. Advanced users may want -to edit those templates in the `templates` folder of the installed R package -(e.g. to set default *SBATCH* options in `submit.sh`). - -* Files generated by the package (scripts, data files and output) are now saved -in a subfolder named `_rslurm_[jobname]` in the current working directory. - -* Minor updates, including reformatting the output of `print_job_status` and -removing this package's dependency on `stringr`. - - -rslurm 0.2.0 -============ - -*2015-11-23* - -* Changed the `slurm_apply` function to use `parallel::mcMap` instead of `mcmapply`, -which fixes a bug where list outputs (i.e. each function call returns a list) -would be collapsed in a single list (rather than returned as a list of lists). - -* Changed the interface so that the output type (table or raw) is now an argument -of `get_slurm_out` rather than of `slurm_apply`, and defaults to `raw`. - -* Added `cpus_per_node` argument to `slurm_apply`, indicating the number of -parallel processes to be run on each node. - - -rslurm 0.1.3 -============ - -*2015-07-13* - -* Added the `slurm_call` function, which submits a single function evaluation -on the cluster, with syntax similar to the base function `do.call`. - -* `get_slurm_out` can now process the output even if some filese are missing, -in which case it issues a warning. - - -rslurm 0.1.2 -============ - -*2015-06-29* - -* Added the optional argument `pkgs` to `slurm_apply`, indicating which packages -should be loaded on each node (by default, all packages currently attached to -the user's R session). - - -rslurm 0.1.1 -============ - -*2015-06-24* - -* Added the optional argument `output` to `slurm_apply`, which can take the -value `table` (each function evaluation returns a row, output is a data frame) or -`raw` (each function evaluation returns an arbitrary R object, output is a list). - -* Fixed a bug in the chunk size calculation for `slurm_apply`. - - -rslurm 0.1.0 -============ - -*2015-06-16* - -* First version of the package released on Github. diff --git a/R/get_slurm_out.R b/R/get_slurm_out.R index fcba98c..95912b7 100755 --- a/R/get_slurm_out.R +++ b/R/get_slurm_out.R @@ -27,7 +27,9 @@ get_slurm_out <- function(slr_job, outtype = "raw", wait = TRUE) { # Check arguments - if (!(class(slr_job) == "slurm_job")) stop("slr_job must be a slurm_job") + if (!(class(slr_job) == "slurm_job")) { + stop("slr_job must be a slurm_job") + } outtypes <- c("table", "raw") if (!(outtype %in% outtypes)) { stop(paste("outtype should be one of:", paste(outtypes, collapse = ', '))) @@ -37,9 +39,11 @@ get_slurm_out <- function(slr_job, outtype = "raw", wait = TRUE) { } # Wait for slr_job using SLURM dependency - if (wait) wait_for_job(slr_job) + if (wait) { + wait_for_job(slr_job) + } - res_files <- paste0("results_", 0:(slr_job$nodes - 1), ".RData") + res_files <- paste0("results_", 0:(slr_job$nodes - 1), ".RDS") tmpdir <- paste0("_rslurm_", slr_job$jobname) missing_files <- setdiff(res_files, dir(path = tmpdir)) if (length(missing_files) > 0) { diff --git a/R/rslurm.R b/R/rslurm.R index a477ffc..b4d3889 100755 --- a/R/rslurm.R +++ b/R/rslurm.R @@ -1,8 +1,3 @@ -#' rslurm: Submit R calculations to a SLURM cluster -#' -#' This package automates the process of sending simple function calls or -#' parallel calculations to a cluster using the SLURM workload manager. -#' #' @section Overview: #' This package includes two core functions used to send computations to a #' SLURM cluster. While \code{\link{slurm_call}} executes a function using a @@ -19,7 +14,7 @@ #' \code{\link{get_slurm_out}} and \code{\link{cleanup_files}}. #' #' For bug reports or questions about this package, contact -#' Philippe Marchand (pmarchand@@sesync.org). +#' Ian Carroll(icarroll@sesync.org). #' #' @section Function Specification: #' To be compatible with \code{\link{slurm_apply}}, a function may accept @@ -65,9 +60,10 @@ #' cleanup_files(sjob1) #' } #' -#' @docType package -#' @name rslurm-package -#' @aliases rslurm +#' @section Acknowledgement: +#' Development of this R package was supported by the National +#' Socio-Environmental Synthesis Center (SESYNC) under funding received +#' from the National Science Foundation DBI-1052875. #' #' @importFrom utils capture.output -NULL +"_PACKAGE" \ No newline at end of file diff --git a/R/slurm_apply.R b/R/slurm_apply.R index 7c4aeb4..1833f25 100755 --- a/R/slurm_apply.R +++ b/R/slurm_apply.R @@ -4,8 +4,8 @@ #' parameters in parallel, spread across multiple nodes of a SLURM cluster. #' #' This function creates a temporary folder ("_rslurm_[jobname]") in the current -#' directory, holding the .RData files, the R script and the Bash submission -#' script generated for the SLURM job. +#' directory, holding .RData and .RDS data files, the R script to run and the Bash +#' submission script generated for the SLURM job. #' #' The set of input parameters is divided in equal chunks sent to each node, and #' \code{f} is evaluated in parallel within each node using functions from the @@ -23,7 +23,7 @@ #' #' When processing the computation job, the SLURM cluster will output two types #' of files in the temporary folder: those containing the return values of the -#' function for each subset of parameters ("results_[node_id].RData") and those +#' function for each subset of parameters ("results_[node_id].RDS") and those #' containing any console or error output produced by R on each node #' ("slurm_[node_id].out"). #' @@ -100,9 +100,9 @@ slurm_apply <- function(f, params, jobname = NA, nodes = 2, cpus_per_node = 2, # Create temp folder tmpdir <- paste0("_rslurm_", jobname) - dir.create(tmpdir) + dir.create(tmpdir, showWarnings = FALSE) - saveRDS(params, file = file.path(tmpdir, "params.RData")) + saveRDS(params, file = file.path(tmpdir, "params.RDS")) if (!is.null(add_objects)) { save(list = add_objects, file = file.path(tmpdir, "add_objects.RData")) } @@ -142,6 +142,10 @@ slurm_apply <- function(f, params, jobname = NA, nodes = 2, cpus_per_node = 2, writeLines(script_sh, file.path(tmpdir, "submit.sh")) # Submit job to SLURM if applicable + if (submit && system('squeue', ignore.stdout = TRUE)) { + submit <- FALSE + cat("Cannot submit; no SLURM workload manager on path\n") + } if (submit) { jobid <- submit_slurm_job(tmpdir) } else { diff --git a/R/slurm_call.R b/R/slurm_call.R index d486554..81c1467 100755 --- a/R/slurm_call.R +++ b/R/slurm_call.R @@ -4,8 +4,8 @@ #' cluster. #' #' This function creates a temporary folder ("_rslurm_[jobname]") in the current -#' directory, holding the .RData files, the R script and the Bash submission -#' script generated for the SLURM job. +#' directory, holding .RData and .RDS data files, the R script to run and the Bash +#' submission script generated for the SLURM job. #' #' The names of any other R objects (besides \code{params}) that \code{f} needs #' to access should be listed in the \code{add_objects} argument. @@ -20,7 +20,7 @@ #' #' When processing the computation job, the SLURM cluster will output two files #' in the temporary folder: one with the return value of the function -#' ("results_0.RData") and one containing any console or error output produced +#' ("results_0.RDS") and one containing any console or error output produced #' by R ("slurm_[node_id].out"). #' #' If \code{submit = TRUE}, the job is sent to the cluster and a confirmation @@ -74,9 +74,9 @@ slurm_call <- function(f, params, jobname = NA, add_objects = NULL, # Create temp folder tmpdir <- paste0("_rslurm_", jobname) - dir.create(tmpdir) + dir.create(tmpdir, showWarnings = FALSE) - saveRDS(params, file = file.path(tmpdir, "params.RData")) + saveRDS(params, file = file.path(tmpdir, "params.RDS")) if (!is.null(add_objects)) { save(list = add_objects, file = file.path(tmpdir, "add_objects.RData")) } @@ -103,6 +103,10 @@ slurm_call <- function(f, params, jobname = NA, add_objects = NULL, writeLines(script_sh, file.path(tmpdir, "submit.sh")) # Submit job to SLURM if applicable + if (system('squeue', ignore.stdout = TRUE)) { + submit <- FALSE + cat("Cannot submit; no SLURM workload manager on path\n") + } if (submit) { jobid <- submit_slurm_job(tmpdir) } else { diff --git a/R/slurm_job.R b/R/slurm_job.R index 83ab812..d5a481f 100755 --- a/R/slurm_job.R +++ b/R/slurm_job.R @@ -16,10 +16,7 @@ #' @return A \code{slurm_job} object. #' @export slurm_job <- function(jobname, nodes, jobid) { - slr_job <- list(jobname = jobname, nodes = nodes) - if (!missing(jobid)) { - slr_job$jobid <- as.integer(jobid) - } + slr_job <- list(jobname = jobname, nodes = nodes, jobid = jobid) class(slr_job) <- "slurm_job" slr_job } diff --git a/R/slurm_utils.R b/R/slurm_utils.R index e541a45..06c3b22 100755 --- a/R/slurm_utils.R +++ b/R/slurm_utils.R @@ -7,7 +7,8 @@ func_to_str <- function(f) { } -# Make jobname by cleaning user-provided name or (if NA) generate one from clock +# Make jobname by cleaning user-provided name or (if NA) generate one +# from base::tempfile make_jobname <- function(name) { if (is.na(name)) { tmpfile <- tempfile("_rslurm_", tmpdir=".") @@ -47,7 +48,9 @@ local_slurm_array <- function(slr_job) { "Sys.setenv(SLURM_ARRAY_TASK_ID = i)", "source('slurm_run.R')", "}"), "local_run.R") system(paste(rscript_path, "--vanilla local_run.R")) + slr_job$jobid = 0L }, finally = setwd(olddir)) + return(slr_job) } # Submit job capturing jobid diff --git a/README.md b/README.md old mode 100755 new mode 100644 index 97044d5..460e166 --- a/README.md +++ b/README.md @@ -1,236 +1,74 @@ -rslurm -====== - -[![Travis-CI Build Status](https://travis-ci.org/SESYNC-ci/rslurm.svg?branch=master)](https://travis-ci.org/SESYNC-ci/rslurm) - -Many computing-intensive processes in R involve the repeated evaluation of -a function over many items or parameter sets. These so-called -[embarrassingly parallel](https://en.wikipedia.org/wiki/Embarrassingly_parallel) -calculations can be run serially with the `lapply` or `Map` function, or in parallel -on a single machine with `mclapply` or `mcMap` (from the **parallel** package). - -The rslurm package simplifies the process of distributing this type of calculation -across a computing cluster that uses the [SLURM](http://slurm.schedmd.com/) -workload manager. Its main function, `slurm_apply`, automatically divides the -computation over multiple nodes and writes the necessary submission scripts. -It also includes functions to retrieve and combine the output from different nodes, -as well as wrappers for common SLURM commands. - -*Development of this R package was supported by the National Socio-Environmental Synthesis Center (SESYNC) under funding received from the National Science Foundation DBI-1052875.* - - -### Table of contents - -- [Basic example](#basic-example) -- [Single function evaluation](#single-function-evaluation) -- [Adding auxiliary data and functions](#adding-auxiliary-data-and-functions) -- [Configuring SLURM options](#configuring-slurm-options) -- [Generating scripts for later submission](#generating-scripts-for-later-submission) -- [How it works / advanced customization](#how-it-works-advanced-customization) - - -## Basic example - -To illustrate a typical rslurm workflow, we use a simple function that takes -a mean and standard deviation as parameters, generates a million normal deviates -and returns the sample mean and standard deviation. - -```r -test_func <- function(par_mu, par_sd) { - samp <- rnorm(10^6, par_mu, par_sd) - c(s_mu = mean(samp), s_sd = sd(samp)) -} -``` - -We then create a parameter data frame where each row is a parameter set and each -column matches an argument of the function. - -```r -pars <- data.frame(par_mu = 1:10, - par_sd = seq(0.1, 1, length.out = 10)) -head(pars, 3) -``` - -``` - par_mu par_sd -1 1 0.1 -2 2 0.2 -3 3 0.3 -``` - -We can now pass that function and the parameters data frame to `slurm_apply`, -specifiying the number of cluster nodes to use and the number of CPUs per node. -The latter (`cpus_per_node`) determines how many processes will be forked on -each node, as the `mc.cores` argument of `parallel::mcMap`. -```r -library(rslurm) -sjob <- slurm_apply(test_func, pars, jobname = "test_job", - nodes = 2, cpus_per_node = 2) -``` -The output of `slurm_apply` is a *slurm_job* object that stores a few pieces of -information (job name and number of nodes) needed to retrieve the job's output. - -Assuming the function is run on a machine with access to the cluster, it also -prints a message confirming the job has been submitted to SLURM. -``` -Submitted batch job 352375 -``` - -Particular clusters may require the specification of additional SLURM options, -such as time and memory limits for the job. Also, when running R on a local -machine without direct cluster access, you may want to generate scripts to be -copied to the cluster and run at a later time. These topics are covered in -additional sections below this basic example. - -After the job has been submitted, you can call `print_job_status` to display its -status (in queue, running or completed) or call `cancel_slurm` to cancel its -execution. These functions are R wrappers for the SLURM command line functions -`squeue` and `scancel`, respectively. - -Once the job completes, `get_slurm_out` reads and combines the output from all -nodes. -```r -res <- get_slurm_out(sjob, outtype = "table") -head(res, 3) -``` - -``` - s_mu s_sd -1 1.000005 0.09987899 -2 2.000185 0.20001108 -3 3.000238 0.29988789 -``` - -When `outtype = "table"`, the outputs from each function evaluation are -row-bound into a single data frame; this is an appropriate format when the -function returns a simple vector. The default `outtype = "raw"` combines the -outputs into a list and can thus handle arbitrarily complex return objects. - -```r -res_raw <- get_slurm_out(sjob, outtype = "raw") -res_raw[1:3] -``` - -``` -[[1]] - s_mu s_sd -1.00000506 0.09987899 - -[[2]] - s_mu s_sd -2.0001852 0.2000111 - -[[3]] - s_mu s_sd -3.0002377 0.2998879 -``` - -The files generated by `slurm_apply` are saved in a folder named -*\_rslurm_[jobname]* under the current working directory. - -```r -dir("_rslurm_test_job") -``` - -``` -[1] "params.RData" "results_0.RData" "results_1.RData" "slurm_0.out" -[5] "slurm_1.out" "slurm_run.R" "submit.sh" -``` - -The utility function `cleanup_files` deletes the temporary folder for the -specified *slurm_job*. - - -## Single function evaluation - -In addition to `slurm_apply`, rslurm also defines a `slurm_call` function, which -sends a single function call to the cluster. It is analogous in syntax to the -base R function `do.call`, accepting a function and a named list of parameters -as arguments. - -```r -sjob <- slurm_call(test_func, list(par_mu = 5, par_sd = 1)) -``` - -Because `slurm_call` involves a single process on a single node, it does not -recognize the `nodes` and `cpus_per_node` arguments; otherwise, it accepts the -same additional arguments (detailed in the sections below) as `slurm_apply`. - - -## Adding auxiliary data and functions - -The function passed to `slurm_apply` can only receive atomic parameters stored -within a data frame. Suppose we want instead to apply a function `func` to a list -of complex R objects, `obj_list`. To use `slurm_apply` in this case, we can wrap -`func` in an inline function that takes an integer parameter. - -```r -sjob <- slurm_apply(function(i) func(obj_list[[i]]), - data.frame(i = seq_along(obj_list)), - add_objects = c("func", "obj_list"), - nodes = 2, cpus_per_node = 2) -``` - -The `add_objects` argument specifies the names of any R objects (besides the -parameters data frame) that must be accessed by the function passed to -`slurm_apply`. These objects are saved to a `.RData` file that is loaded -on each cluster node prior to evaluating the function in parallel. +rslurm: Submit R Calculations to a 'SLURM' Cluster +-------------------------------------------------- -By default, all R packages attached to the current R session will also be -attached (with `library`) on each cluster node, though this can be modified with -the optional `pkgs` argument. +![](https://travis-ci.org/SESYNC-ci/rslurm.svg?branch=master) +### Description -## Configuring SLURM options +Functions that simplify submitting R scripts to a 'SLURM' cluster workload manager, in part by automating the division of embarrassingly parallel calculations across cluster nodes. -The `slurm_options` argument allows you to set any of the command line -options ([view list](http://slurm.schedmd.com/sbatch.html)) recognized by the -SLURM `sbatch` command. It should be formatted as a named list, using the long -names of each option (e.g. "time" rather than "t"). Flags, i.e. command line -options that are toggled rather than set to a particular value, should be set to -`TRUE` in `slurm_options`. For example, the following code: -```r -sjob <- slurm_apply(test_func, pars, - slurm_options = list(time = "1:00:00", share = TRUE)) -``` -sets the command line options `--time=1:00:00 --share`. +### Overview +This package includes two core functions used to send computations to a SLURM cluster. While `slurm_call` executes a function using a single set of parameters (passed as a list), `slurm_apply` evaluates the function in parallel for multiple sets of parameters grouped in a data frame. `slurm_apply` automatically splits the parameter sets into equal-size chunks, each chunk to be processed by a separate cluster node. It uses functions from the `parallel` package to parallelize computations within each node. -## Generating scripts for later submission +The output of `slurm_apply` or `slurm_call` is a `slurm_job` object that serves as an input to the other functions in the package: `print_job_status`, `cancel_slurm`, `get_slurm_out` and `cleanup_files`. -When working from a R session without direct access to the cluster, you can set -`submit = FALSE` within `slurm_apply`. The function will create the -*\_rslurm\_[jobname]* folder and generate the scripts and .RData files, without -submitting the job. You may then copy those files to the cluster and submit the -job manually by calling `sbatch submit.sh` from the command line. +For bug reports or questions about this package, contact Ian Carroll(icarroll@sesync.org). +### Function Specification -## How it works / advanced customization +To be compatible with `slurm_apply`, a function may accept any number of single value parameters. The names of these parameters must match the column names of the `params` data frame supplied. There are no restrictions on the types of parameters passed as a list to `slurm_call`. -As mentioned above, the `slurm_apply` function creates a job-specific folder. -This folder contains the parameters data frame and (if applicable) the objects -specified as `add_objects`, both saved in *.RData* files. The function also -generates a R script (`slurm_run.R`) to be run on each cluster node, as well -as a Bash script (`submit.sh`) to submit the job to SLURM. +If the function passed to `slurm_call` or `slurm_apply` requires knowledge of any R objects (data, custom helper functions) besides `params`, a character vector corresponding to their names should be passed to the optional `add_objects` argument. -More specifically, the Bash script creates a SLURM job array, with each cluster -node receiving a different value of the *SLURM\_ARRAY\_TASK\_ID* environment -variable. This variable is read by `slurm_run.R`, which allows each instance of -the script to operate on a different parameter subset and write its output to -a different results file. The R script calls `parallel::mcMap` to parallelize -calculations on each node. +When parallelizing a function, since any error will interrupt all calculations for the current node, it may be useful to wrap expressions which may generate errors into a `try` or `tryCatch` function. This will ensure the computation continues with the next parameter set after reporting the error. -Both `slurm_run.R` and `submit.sh` are generated from templates, using the -**whisker** package; these templates can be found in the `rslurm/templates` -subfolder in your R package library. There are two templates for each script, -one for `slurm_apply` and the other (with the word *single* in its title) for -`slurm_call`. +### Output Format -While you should avoid changing any existing lines in the template scripts, you -may want to add `#SBATCH` lines to the `submit.sh` templates in order to -permanently set certain SLURM command line options and thus customize the package -to your particular cluster setup. +The default output format for `get_slurm_out` (`outtype = "raw"`) is a list where each element is the return value of one function call. If the function passed to `slurm_apply` produces a vector output, you may use `outtype = "table"` to collect the output in a single data frame, with one row by function call. +### Acknowledgement +Development of this R package was supported by the National Socio-Environmental Synthesis Center (SESYNC) under funding received from the National Science Foundation DBI-1052875. +### Author(s) + +**Maintainer**: Ian Carroll \[contributor\] + +Authors: + +- Philippe Marchand + +Other contributors: + +- Mike Smorul \[contributor\] + +### See Also + +Useful links: + +- + +- Report bugs at + +### Examples + + ## Not run: + # Create a data frame of mean/sd values for normal distributions + pars <- data.frame(par_m = seq(-10, 10, length.out = 1000), + par_sd = seq(0.1, 10, length.out = 1000)) + + # Create a function to parallelize + ftest <- function(par_m, par_sd) { + samp <- rnorm(10^7, par_m, par_sd) + c(s_m = mean(samp), s_sd = sd(samp)) + } + + sjob1 <- slurm_apply(ftest, pars) + print_job_status(sjob1) + res <- get_slurm_out(sjob1, "table") + all.equal(pars, res) # Confirm correct output + cleanup_files(sjob1) + + ## End(Not run) diff --git a/cran-comments.md b/cran-comments.md index aa41dab..d46c2b7 100755 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,17 +1,20 @@ -This is a minor update to the package to fix a previous error found by CRAN -when checking package on r-devel on Mac OS X. +This fixes the version number and provides a pre-built vignette, its source, and a vignette index. + +The 'rslurm.Rmd' vignette (formerly 'rslurm-vignette.Rmd') now only builds on a SLURM head node. The built vignette in inst/doc can be used as is. To prevent build errors, the vignette is matched in .Buildignore. The source for the vignette is copied into 'inst/doc' for FOSS compliance. This explains the NOTE. ## Tested on -win-builder (devel and release) -Ubuntu 12.04 with R 3.3 (on travis-ci) -OS X with R 3.3 (local machine) +Ubuntu 14.04 with R 3.3 (SLURM head node) +Ubuntu 12.04 with R 3.3 (travis-ci) +macOS 10.12 with R 3.3 (local machine) +win-builder (release and devel) + +## R CMD check -## R CMD check results +0 errors | 0 warnings | 1 notes -1 NOTE (for some versions) - Namespace in Imports field not imported from: ‘parallel’ - All declared Imports should be used. - -This note occurs because the `parallel::mcMap` function is called in the batch -scripts generated by this package's functions (from the template located at inst/templates/slurm\_run\_R.txt), rather than in the package's R code itself. +* checking for old-style vignette sources ... NOTE +Vignette sources only in ‘inst/doc’: + ‘rslurm.Rmd’ +A ‘vignettes’ directory is required as from R 3.1.0 +and these will not be indexed nor checked \ No newline at end of file diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd new file mode 100644 index 0000000..dad563a --- /dev/null +++ b/inst/NEWS.Rd @@ -0,0 +1,68 @@ +\name{NEWS} +\title{News for Package \pkg{rslurm}} +\newcommand{\ghpr}{\href{https://github.com/sesync-ci/rslurm/pull/#1}{##1}} +\newcommand{\ghit}{\href{https://github.com/sesync-ci/rslurm/issues/#1}{##1}} + +\section{Changes in version 0.3.3 from 2017-04-07}{ +Minor update to repair README. + \itemize{ + \item Create README from R/slurm.R. + } +} + +\section{Changes in version 0.3.2 from 2017-04-06}{ +Minor update to include new feature and bug fixes. + \itemize{ + \item "wait" argument adds option to slurm_apply and slurm_call to block the + calling script until the submitted job completes. This option can be used to allow immediate processing of a submitted job's output (\ghpr{2}). + \item Use ".RDS" file extension, rather than ".RData", for serialized objects (\ghpr{4}). + \item Minor bug fixes (\ghpr{4}). + } +} +\section{Changes in version 0.3.1 from 2016-06-18}{ +Minor bug fix. + \itemize{ + \item specify full path of `Rscript` when running batch scripts. + } +} +\section{Changes in version 0.3.0 from 2016-05-27}{ +Major update to the package interface and implementation. + \itemize{ + \item First version on CRAN + \item Added a `submit` argument to `slurm_apply` and `slurm_call`. If `submit = FALSE`, the submission scripts are created but not run. This is useful if the files need to be transferred from a local machine to the cluster and run at a later time. + \item Added new optional arguments to `slurm_apply` and `slurm_call`, allowing users to give informative names to SLURM jobs (`jobname`) and set any options understood by `sbatch` (`slurm_options`). + \item The `data_file` arugment to `slurm_apply` and `slurm_call` is replaced with `add_objects`, which accepts a vector of R object names from the active workspace and automatically saves them in a .RData file to be loaded on each node. + \item `slurm_apply` and `slurm_call` now generate R and Bash scripts through [whisker](https://github.com/edwindj/whisker) templates. Advanced users may want to edit those templates in the `templates` folder of the installed R package (e.g. to set default *SBATCH* options in `submit.sh`). + \item Files generated by the package (scripts, data files and output) are now saved in a subfolder named `_rslurm_[jobname]` in the current working directory. + \item Minor updates, including reformatting the output of `print_job_status` and removing this package's dependency on `stringr`. + } +} +\section{Changes in version 0.2.0 from 2015-11-23}{ + \itemize{ + \item Changed the `slurm_apply` function to use `parallel::mcMap` instead of `mcmapply`, which fixes a bug where list outputs (i.e. each function call returns a list) would be collapsed in a single list (rather than returned as a list of lists). + \item Changed the interface so that the output type (table or raw) is now an argument of `get_slurm_out` rather than of `slurm_apply`, and defaults to `raw`. + \item Added `cpus_per_node` argument to `slurm_apply`, indicating the number of parallel processes to be run on each node. + } +} +\section{Changes in version 0.1.3 from 2015-07-13}{ + \itemize{ + \item Added the `slurm_call` function, which submits a single function evaluation on the cluster, with syntax similar to the base function `do.call`. + \item `get_slurm_out` can now process the output even if some filese are missing, in which case it issues a warning. + } +} +\section{Changes in version 0.1.2 from 2015-06-29}{ + \itemize{ + \item Added the optional argument `pkgs` to `slurm_apply`, indicating which packages should be loaded on each node (by default, all packages currently attached to the user's R session). + } +} +\section{Changes in version 0.1.1 from 2015-06-24}{ + \itemize{ + \item Added the optional argument `output` to `slurm_apply`, which can take the value `table` (each function evaluation returns a row, output is a data frame) or `raw` (each function evaluation returns an arbitrary R object, output is a list). + \item Fixed a bug in the chunk size calculation for `slurm_apply`. + } +} +\section{Changes in version 0.1.0 2015-06-16}{ + \itemize{ + \item First version of the package released on Github. + } +} diff --git a/inst/doc/rslurm.R b/inst/doc/rslurm.R new file mode 100644 index 0000000..f2f1994 --- /dev/null +++ b/inst/doc/rslurm.R @@ -0,0 +1,56 @@ +## ------------------------------------------------------------------------ +test_func <- function(par_mu, par_sd) { + samp <- rnorm(10^6, par_mu, par_sd) + c(s_mu = mean(samp), s_sd = sd(samp)) +} + +## ------------------------------------------------------------------------ +pars <- data.frame(par_mu = 1:10, + par_sd = seq(0.1, 1, length.out = 10)) +head(pars, 3) + +## ------------------------------------------------------------------------ +library(rslurm) +sjob <- slurm_apply(test_func, pars, jobname = "test_job", + nodes = 2, cpus_per_node = 2) + +## ------------------------------------------------------------------------ +res <- get_slurm_out(sjob, outtype = "table") +head(res, 3) + +## ------------------------------------------------------------------------ +res_raw <- get_slurm_out(sjob, outtype = "raw", wait = FALSE) +res_raw[1:3] + +## ------------------------------------------------------------------------ +dir("_rslurm_test_job") + +## ----echo=FALSE---------------------------------------------------------- +cleanup_files(sjob) + +## ------------------------------------------------------------------------ +sjob <- slurm_call(test_func, list(par_mu = 5, par_sd = 1)) + +## ----echo=FALSE---------------------------------------------------------- +cleanup_files(sjob) + +## ----echo=FALSE---------------------------------------------------------- +obj_list <- list(NULL) +func <- function(obj) {} + +## ------------------------------------------------------------------------ +sjob <- slurm_apply(function(i) func(obj_list[[i]]), + data.frame(i = seq_along(obj_list)), + add_objects = c("func", "obj_list"), + nodes = 2, cpus_per_node = 2) + +## ----echo=FALSE---------------------------------------------------------- +cleanup_files(sjob) + +## ------------------------------------------------------------------------ +sjob <- slurm_apply(test_func, pars, + slurm_options = list(time = "1:00:00", share = TRUE)) + +## ----echo=FALSE---------------------------------------------------------- +cleanup_files(sjob) + diff --git a/vignettes/rslurm-vignette.Rmd b/inst/doc/rslurm.Rmd old mode 100755 new mode 100644 similarity index 91% rename from vignettes/rslurm-vignette.Rmd rename to inst/doc/rslurm.Rmd index d38bc77..416ee47 --- a/vignettes/rslurm-vignette.Rmd +++ b/inst/doc/rslurm.Rmd @@ -2,7 +2,7 @@ title: "Parallelize R code on a SLURM cluster" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{rslurm-vignette} + %\VignetteIndexEntry{Parallelize R code on a SLURM cluster} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -36,7 +36,7 @@ To illustrate a typical rslurm workflow, we use a simple function that takes a mean and standard deviation as parameters, generates a million normal deviates and returns the sample mean and standard deviation. -```r +```{r} test_func <- function(par_mu, par_sd) { samp <- rnorm(10^6, par_mu, par_sd) c(s_mu = mean(samp), s_sd = sd(samp)) @@ -46,24 +46,17 @@ test_func <- function(par_mu, par_sd) { We then create a parameter data frame where each row is a parameter set and each column matches an argument of the function. -```r +```{r} pars <- data.frame(par_mu = 1:10, par_sd = seq(0.1, 1, length.out = 10)) head(pars, 3) ``` -``` - par_mu par_sd -1 1 0.1 -2 2 0.2 -3 3 0.3 -``` - We can now pass that function and the parameters data frame to `slurm_apply`, specifiying the number of cluster nodes to use and the number of CPUs per node. The latter (`cpus_per_node`) determines how many processes will be forked on each node, as the `mc.cores` argument of `parallel::mcMap`. -```r +```{r} library(rslurm) sjob <- slurm_apply(test_func, pars, jobname = "test_job", nodes = 2, cpus_per_node = 2) @@ -73,9 +66,6 @@ information (job name and number of nodes) needed to retrieve the job's output. Assuming the function is run on a machine with access to the cluster, it also prints a message confirming the job has been submitted to SLURM. -``` -Submitted batch job 352375 -``` Particular clusters may require the specification of additional SLURM options, such as time and memory limits for the job. Also, when running R on a local @@ -90,57 +80,35 @@ execution. These functions are R wrappers for the SLURM command line functions Once the job completes, `get_slurm_out` reads and combines the output from all nodes. -```r +```{r} res <- get_slurm_out(sjob, outtype = "table") head(res, 3) ``` -``` - s_mu s_sd -1 1.000005 0.09987899 -2 2.000185 0.20001108 -3 3.000238 0.29988789 -``` - When `outtype = "table"`, the outputs from each function evaluation are row-bound into a single data frame; this is an appropriate format when the function returns a simple vector. The default `outtype = "raw"` combines the outputs into a list and can thus handle arbitrarily complex return objects. -```r -res_raw <- get_slurm_out(sjob, outtype = "raw") +```{r} +res_raw <- get_slurm_out(sjob, outtype = "raw", wait = FALSE) res_raw[1:3] ``` -``` -[[1]] - s_mu s_sd -1.00000506 0.09987899 - -[[2]] - s_mu s_sd -2.0001852 0.2000111 - -[[3]] - s_mu s_sd -3.0002377 0.2998879 -``` - The files generated by `slurm_apply` are saved in a folder named *\_rslurm_[jobname]* under the current working directory. -```r +```{r} dir("_rslurm_test_job") ``` -``` -[1] "params.RData" "results_0.RData" "results_1.RData" "slurm_0.out" -[5] "slurm_1.out" "slurm_run.R" "submit.sh" -``` - The utility function `cleanup_files` deletes the temporary folder for the specified *slurm_job*. +```{r echo=FALSE} +cleanup_files(sjob) +``` + ## Single function evaluation @@ -149,9 +117,12 @@ sends a single function call to the cluster. It is analogous in syntax to the base R function `do.call`, accepting a function and a named list of parameters as arguments. -```r +```{r} sjob <- slurm_call(test_func, list(par_mu = 5, par_sd = 1)) ``` +```{r echo=FALSE} +cleanup_files(sjob) +``` Because `slurm_call` involves a single process on a single node, it does not recognize the `nodes` and `cpus_per_node` arguments; otherwise, it accepts the @@ -165,12 +136,19 @@ within a data frame. Suppose we want instead to apply a function `func` to a lis of complex R objects, `obj_list`. To use `slurm_apply` in this case, we can wrap `func` in an inline function that takes an integer parameter. -```r +```{r echo=FALSE} +obj_list <- list(NULL) +func <- function(obj) {} +``` +```{r} sjob <- slurm_apply(function(i) func(obj_list[[i]]), data.frame(i = seq_along(obj_list)), add_objects = c("func", "obj_list"), nodes = 2, cpus_per_node = 2) ``` +```{r echo=FALSE} +cleanup_files(sjob) +``` The `add_objects` argument specifies the names of any R objects (besides the parameters data frame) that must be accessed by the function passed to @@ -190,10 +168,13 @@ SLURM `sbatch` command. It should be formatted as a named list, using the long names of each option (e.g. "time" rather than "t"). Flags, i.e. command line options that are toggled rather than set to a particular value, should be set to `TRUE` in `slurm_options`. For example, the following code: -```r +```{r} sjob <- slurm_apply(test_func, pars, slurm_options = list(time = "1:00:00", share = TRUE)) ``` +```{r echo=FALSE} +cleanup_files(sjob) +``` sets the command line options `--time=1:00:00 --share`. @@ -209,8 +190,8 @@ job manually by calling `sbatch submit.sh` from the command line. ## How it works / advanced customization As mentioned above, the `slurm_apply` function creates a job-specific folder. -This folder contains the parameters data frame and (if applicable) the objects -specified as `add_objects`, both saved in *.RData* files. The function also +This folder contains the parameters as a *.RDS* file and (if applicable) the objects +specified as `add_objects` saved together in a *.RData* file. The function also generates a R script (`slurm_run.R`) to be run on each cluster node, as well as a Bash script (`submit.sh`) to submit the job to SLURM. diff --git a/inst/doc/rslurm.html b/inst/doc/rslurm.html new file mode 100644 index 0000000..ea42450 --- /dev/null +++ b/inst/doc/rslurm.html @@ -0,0 +1,188 @@ + + + + + + + + + + + + + + +Parallelize R code on a SLURM cluster + + + + + + + + + + + + + + + + + +

Parallelize R code on a SLURM cluster

+ + + +

Many computing-intensive processes in R involve the repeated evaluation of a function over many items or parameter sets. These so-called embarrassingly parallel calculations can be run serially with the lapply or Map function, or in parallel on a single machine with mclapply or mcMap (from the parallel package).

+

The rslurm package simplifies the process of distributing this type of calculation across a computing cluster that uses the SLURM workload manager. Its main function, slurm_apply, automatically divides the computation over multiple nodes and writes the necessary submission scripts. It also includes functions to retrieve and combine the output from different nodes, as well as wrappers for common SLURM commands.

+ +
+

Basic example

+

To illustrate a typical rslurm workflow, we use a simple function that takes a mean and standard deviation as parameters, generates a million normal deviates and returns the sample mean and standard deviation.

+
test_func <- function(par_mu, par_sd) {
+    samp <- rnorm(10^6, par_mu, par_sd)
+    c(s_mu = mean(samp), s_sd = sd(samp))
+}
+

We then create a parameter data frame where each row is a parameter set and each column matches an argument of the function.

+
pars <- data.frame(par_mu = 1:10,
+                   par_sd = seq(0.1, 1, length.out = 10))
+head(pars, 3)
+
##   par_mu par_sd
+## 1      1    0.1
+## 2      2    0.2
+## 3      3    0.3
+

We can now pass that function and the parameters data frame to slurm_apply, specifiying the number of cluster nodes to use and the number of CPUs per node. The latter (cpus_per_node) determines how many processes will be forked on each node, as the mc.cores argument of parallel::mcMap.

+
library(rslurm)
+sjob <- slurm_apply(test_func, pars, jobname = "test_job", 
+                    nodes = 2, cpus_per_node = 2)
+
## Submitted batch job 551685
+

The output of slurm_apply is a slurm_job object that stores a few pieces of information (job name and number of nodes) needed to retrieve the job’s output.

+

Assuming the function is run on a machine with access to the cluster, it also prints a message confirming the job has been submitted to SLURM.

+

Particular clusters may require the specification of additional SLURM options, such as time and memory limits for the job. Also, when running R on a local machine without direct cluster access, you may want to generate scripts to be copied to the cluster and run at a later time. These topics are covered in additional sections below this basic example.

+

After the job has been submitted, you can call print_job_status to display its status (in queue, running or completed) or call cancel_slurm to cancel its execution. These functions are R wrappers for the SLURM command line functions squeue and scancel, respectively.

+

Once the job completes, get_slurm_out reads and combines the output from all nodes.

+
res <- get_slurm_out(sjob, outtype = "table")
+head(res, 3)
+
##        s_mu      s_sd
+## 1 0.9999848 0.1000190
+## 2 1.9997968 0.2000883
+## 3 2.9998983 0.2998295
+

When outtype = "table", the outputs from each function evaluation are row-bound into a single data frame; this is an appropriate format when the function returns a simple vector. The default outtype = "raw" combines the outputs into a list and can thus handle arbitrarily complex return objects.

+
res_raw <- get_slurm_out(sjob, outtype = "raw", wait = FALSE)
+res_raw[1:3]
+
## [[1]]
+##      s_mu      s_sd 
+## 0.9999848 0.1000190 
+## 
+## [[2]]
+##      s_mu      s_sd 
+## 1.9997968 0.2000883 
+## 
+## [[3]]
+##      s_mu      s_sd 
+## 2.9998983 0.2998295
+

The files generated by slurm_apply are saved in a folder named _rslurm_[jobname] under the current working directory.

+
dir("_rslurm_test_job")
+
## [1] "params.RDS"    "results_0.RDS" "results_1.RDS" "slurm_0.out"  
+## [5] "slurm_1.out"   "slurm_run.R"   "submit.sh"
+

The utility function cleanup_files deletes the temporary folder for the specified slurm_job.

+
+
+

Single function evaluation

+

In addition to slurm_apply, rslurm also defines a slurm_call function, which sends a single function call to the cluster. It is analogous in syntax to the base R function do.call, accepting a function and a named list of parameters as arguments.

+
sjob <- slurm_call(test_func, list(par_mu = 5, par_sd = 1))
+
## Submitted batch job 551689
+

Because slurm_call involves a single process on a single node, it does not recognize the nodes and cpus_per_node arguments; otherwise, it accepts the same additional arguments (detailed in the sections below) as slurm_apply.

+
+
+

Adding auxiliary data and functions

+

The function passed to slurm_apply can only receive atomic parameters stored within a data frame. Suppose we want instead to apply a function func to a list of complex R objects, obj_list. To use slurm_apply in this case, we can wrap func in an inline function that takes an integer parameter.

+
sjob <- slurm_apply(function(i) func(obj_list[[i]]), 
+                    data.frame(i = seq_along(obj_list)),
+                    add_objects = c("func", "obj_list"),
+                    nodes = 2, cpus_per_node = 2)
+
## Submitted batch job 551691
+

The add_objects argument specifies the names of any R objects (besides the parameters data frame) that must be accessed by the function passed to slurm_apply. These objects are saved to a .RData file that is loaded on each cluster node prior to evaluating the function in parallel.

+

By default, all R packages attached to the current R session will also be attached (with library) on each cluster node, though this can be modified with the optional pkgs argument.

+
+
+

Configuring SLURM options

+

The slurm_options argument allows you to set any of the command line options (view list) recognized by the SLURM sbatch command. It should be formatted as a named list, using the long names of each option (e.g. “time” rather than “t”). Flags, i.e. command line options that are toggled rather than set to a particular value, should be set to TRUE in slurm_options. For example, the following code:

+
sjob <- slurm_apply(test_func, pars, 
+                    slurm_options = list(time = "1:00:00", share = TRUE))
+
## Submitted batch job 551693
+

sets the command line options --time=1:00:00 --share.

+
+
+

Generating scripts for later submission

+

When working from a R session without direct access to the cluster, you can set submit = FALSE within slurm_apply. The function will create the _rslurm_[jobname] folder and generate the scripts and .RData files, without submitting the job. You may then copy those files to the cluster and submit the job manually by calling sbatch submit.sh from the command line.

+
+
+

How it works / advanced customization

+

As mentioned above, the slurm_apply function creates a job-specific folder. This folder contains the parameters as a .RDS file and (if applicable) the objects specified as add_objects saved together in a .RData file. The function also generates a R script (slurm_run.R) to be run on each cluster node, as well as a Bash script (submit.sh) to submit the job to SLURM.

+

More specifically, the Bash script creates a SLURM job array, with each cluster node receiving a different value of the SLURM_ARRAY_TASK_ID environment variable. This variable is read by slurm_run.R, which allows each instance of the script to operate on a different parameter subset and write its output to a different results file. The R script calls parallel::mcMap to parallelize calculations on each node.

+

Both slurm_run.R and submit.sh are generated from templates, using the whisker package; these templates can be found in the rslurm/templates subfolder in your R package library. There are two templates for each script, one for slurm_apply and the other (with the word single in its title) for slurm_call.

+

While you should avoid changing any existing lines in the template scripts, you may want to add #SBATCH lines to the submit.sh templates in order to permanently set certain SLURM command line options and thus customize the package to your particular cluster setup.

+
+ + + + + + + + diff --git a/inst/templates/slurm_run_R.txt b/inst/templates/slurm_run_R.txt index 232ed02..41a55a6 100755 --- a/inst/templates/slurm_run_R.txt +++ b/inst/templates/slurm_run_R.txt @@ -6,7 +6,7 @@ load('add_objects.RData') .rslurm_func <- {{{func}}} -.rslurm_params <- readRDS('params.RData') +.rslurm_params <- readRDS('params.RDS') .rslurm_id <- as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID')) .rslurm_istart <- .rslurm_id * {{{nchunk}}} + 1 .rslurm_iend <- min((.rslurm_id + 1) * {{{nchunk}}}, nrow(.rslurm_params)) @@ -14,4 +14,4 @@ load('add_objects.RData') .rslurm_params[.rslurm_istart:.rslurm_iend, , drop = FALSE], mc.cores = {{{cpus_per_node}}})) -saveRDS(.rslurm_result, file = paste0('results_', .rslurm_id, '.RData')) +saveRDS(.rslurm_result, file = paste0('results_', .rslurm_id, '.RDS')) diff --git a/inst/templates/slurm_run_single_R.txt b/inst/templates/slurm_run_single_R.txt index 284c0dc..51c3b53 100755 --- a/inst/templates/slurm_run_single_R.txt +++ b/inst/templates/slurm_run_single_R.txt @@ -6,7 +6,7 @@ load('add_objects.RData') .rslurm_func <- {{{func}}} -.rslurm_params <- readRDS('params.RData') +.rslurm_params <- readRDS('params.RDS') .rslurm_result <- do.call(.rslurm_func, .rslurm_params) -saveRDS(.rslurm_result, file = 'results_0.RData') +saveRDS(.rslurm_result, file = 'results_0.RDS') diff --git a/man/rslurm-package.Rd b/man/rslurm-package.Rd old mode 100755 new mode 100644 index 0012186..8c76085 --- a/man/rslurm-package.Rd +++ b/man/rslurm-package.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/rslurm.R \docType{package} \name{rslurm-package} -\alias{rslurm-package} \alias{rslurm} -\title{rslurm: Submit R calculations to a SLURM cluster} +\title{rslurm: Submit R Calculations to a 'SLURM' Cluster} \description{ -This package automates the process of sending simple function calls or -parallel calculations to a cluster using the SLURM workload manager. +Functions that simplify submitting R scripts to a 'SLURM' cluster +workload manager, in part by automating the division of embarrassingly parallel +calculations across cluster nodes. } \section{Overview}{ @@ -26,7 +26,7 @@ in the package: \code{\link{print_job_status}}, \code{\link{cancel_slurm}}, \code{\link{get_slurm_out}} and \code{\link{cleanup_files}}. For bug reports or questions about this package, contact -Philippe Marchand (pmarchand@sesync.org). +Ian Carroll(icarroll@sesync.org). } \section{Function Specification}{ @@ -58,6 +58,13 @@ function passed to \code{slurm_apply} produces a vector output, you may use one row by function call. } +\section{Acknowledgement}{ + +Development of this R package was supported by the National +Socio-Environmental Synthesis Center (SESYNC) under funding received +from the National Science Foundation DBI-1052875. +} + \examples{ \dontrun{ # Create a data frame of mean/sd values for normal distributions @@ -78,3 +85,25 @@ cleanup_files(sjob1) } } +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/SESYNC-ci/rslurm} + \item Report bugs at \url{https://github.com/SESYNC-ci/rslurm/issues} +} + +} +\author{ +\strong{Maintainer}: Ian Carroll \email{icarroll@sesync.org} [contributor] + +Authors: +\itemize{ + \item Philippe Marchand +} + +Other contributors: +\itemize{ + \item Mike Smorul [contributor] +} + +} diff --git a/man/slurm_apply.Rd b/man/slurm_apply.Rd index b730848..c942697 100755 --- a/man/slurm_apply.Rd +++ b/man/slurm_apply.Rd @@ -52,8 +52,8 @@ parameters in parallel, spread across multiple nodes of a SLURM cluster. } \details{ This function creates a temporary folder ("_rslurm_[jobname]") in the current -directory, holding the .RData files, the R script and the Bash submission -script generated for the SLURM job. +directory, holding .RData and .RDS data files, the R script to run and the Bash +submission script generated for the SLURM job. The set of input parameters is divided in equal chunks sent to each node, and \code{f} is evaluated in parallel within each node using functions from the @@ -71,7 +71,7 @@ not be manually set. When processing the computation job, the SLURM cluster will output two types of files in the temporary folder: those containing the return values of the -function for each subset of parameters ("results_[node_id].RData") and those +function for each subset of parameters ("results_[node_id].RDS") and those containing any console or error output produced by R on each node ("slurm_[node_id].out"). diff --git a/man/slurm_call.Rd b/man/slurm_call.Rd index 0cd265d..9859ab4 100755 --- a/man/slurm_call.Rd +++ b/man/slurm_call.Rd @@ -40,8 +40,8 @@ cluster. } \details{ This function creates a temporary folder ("_rslurm_[jobname]") in the current -directory, holding the .RData files, the R script and the Bash submission -script generated for the SLURM job. +directory, holding .RData and .RDS data files, the R script to run and the Bash +submission script generated for the SLURM job. The names of any other R objects (besides \code{params}) that \code{f} needs to access should be listed in the \code{add_objects} argument. @@ -56,7 +56,7 @@ not be manually set. When processing the computation job, the SLURM cluster will output two files in the temporary folder: one with the return value of the function -("results_0.RData") and one containing any console or error output produced +("results_0.RDS") and one containing any console or error output produced by R ("slurm_[node_id].out"). If \code{submit = TRUE}, the job is sent to the cluster and a confirmation diff --git a/release.R b/release.R new file mode 100644 index 0000000..d85cf96 --- /dev/null +++ b/release.R @@ -0,0 +1,30 @@ +library(tools) +library(rmarkdown) +library(devtools) +library(xml2) + +# Document from R/*.R +document() + +# Create a README.md for GitHub and CRAN from '../R/rlurm.R' +# by way of the '../man/rslurm-package.Rd' produced by roxygen2 +Rd2HTML(parse_Rd('man/rslurm-package.Rd'), out = 'README.html') +html <- read_html('README.html') +table <- xml_find_first(html, '//table') +xml_remove(table) +h2 <- xml_find_first(html, '//h2') +img <- read_xml('') +xml_add_sibling(h2, img, where='after') +write_html(html, 'README.html') +pandoc_convert(input='README.html', to='markdown_github', output='README.md') +unlink('README.html') + +# Remove duplicate documentation for rslurm and rslurm-package from index and search +system('sed -i "/alias{rslurm-package}/d" man/rslurm-package.Rd') + +# Build vignettes +build_vignettes() + +# Build +pkg <- build(path='~/tmp/') +check_built(pkg) diff --git a/tests/testthat/test_slurm_apply.R b/tests/testthat/test_slurm_apply.R index da5dd77..7e917a1 100755 --- a/tests/testthat/test_slurm_apply.R +++ b/tests/testthat/test_slurm_apply.R @@ -22,7 +22,7 @@ msg <- capture.output( sjob1 <- slurm_apply(ftest, pars, jobname = "test1", nodes = 2, cpus_per_node = 1, submit = FALSE) ) -local_slurm_array(sjob1) +sjob1 <- local_slurm_array(sjob1) res <- get_slurm_out(sjob1, "table", wait = FALSE) res_raw <- get_slurm_out(sjob1, "raw", wait = FALSE) test_that("slurm_apply gives correct output", { @@ -38,7 +38,7 @@ msg <- capture.output( sjob2 <- slurm_apply(ftest, pars[, 1, drop = FALSE], jobname = "test2", nodes = 2, cpus_per_node = 1, submit = FALSE) ) -local_slurm_array(sjob2) +sjob2 <- local_slurm_array(sjob2) res <- get_slurm_out(sjob2, "table", wait = FALSE) test_that("slurm_apply works with single parameter", { expect_equal(pars$par_m, res$s_m, tolerance = 0.01) @@ -48,7 +48,7 @@ msg <- capture.output( sjob3 <- slurm_apply(ftest, pars[1, ], nodes = 2, jobname = "test3", cpus_per_node = 1, submit = FALSE) ) -local_slurm_array(sjob3) +sjob3 <- local_slurm_array(sjob3) res <- get_slurm_out(sjob3, "table", wait = FALSE) test_that("slurm_apply works with single row", { expect_equal(sjob3$nodes, 1) @@ -59,7 +59,7 @@ msg <- capture.output( sjob4 <- slurm_apply(ftest, pars[1, 1, drop = FALSE], jobname = "test4", nodes = 2, cpus_per_node = 1, submit = FALSE) ) -local_slurm_array(sjob4) +sjob4 <- local_slurm_array(sjob4) res <- get_slurm_out(sjob4, "table", wait = FALSE) test_that("slurm_apply works with single parameter and single row", { expect_equal(pars$par_m[1], res$s_m, tolerance = 0.01) diff --git a/tests/testthat/test_slurm_call.R b/tests/testthat/test_slurm_call.R index 5c0cd53..0362b6d 100755 --- a/tests/testthat/test_slurm_call.R +++ b/tests/testthat/test_slurm_call.R @@ -14,11 +14,12 @@ test_that("slurm_job name is correctly edited", { expect_equal(sjob$jobname, "test_call") }) -olddir <- getwd() -rscript_path <- file.path(R.home("bin"), "Rscript") -setwd(paste0("_rslurm_", sjob$jobname)) -tryCatch(system(paste(rscript_path, "--vanilla slurm_run.R")), - finally = setwd(olddir)) +sjob <- local_slurm_array(sjob) +# olddir <- getwd() +# rscript_path <- file.path(R.home("bin"), "Rscript") +# setwd(paste0("_rslurm_", sjob$jobname)) +# tryCatch(system(paste(rscript_path, "--vanilla slurm_run.R")), +# finally = setwd(olddir)) res <- get_slurm_out(sjob, wait = FALSE) test_that("slurm_call returns correct output", { diff --git a/vignettes/rslurm.Rmd b/vignettes/rslurm.Rmd new file mode 100644 index 0000000..416ee47 --- /dev/null +++ b/vignettes/rslurm.Rmd @@ -0,0 +1,214 @@ +--- +title: "Parallelize R code on a SLURM cluster" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Parallelize R code on a SLURM cluster} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +Many computing-intensive processes in R involve the repeated evaluation of +a function over many items or parameter sets. These so-called +[embarrassingly parallel](https://en.wikipedia.org/wiki/Embarrassingly_parallel) +calculations can be run serially with the `lapply` or `Map` function, or in parallel +on a single machine with `mclapply` or `mcMap` (from the **parallel** package). + +The rslurm package simplifies the process of distributing this type of calculation +across a computing cluster that uses the [SLURM](http://slurm.schedmd.com/) +workload manager. Its main function, `slurm_apply`, automatically divides the +computation over multiple nodes and writes the necessary submission scripts. +It also includes functions to retrieve and combine the output from different nodes, +as well as wrappers for common SLURM commands. + +### Table of contents + +- [Basic example](#basic-example) +- [Single function evaluation](#single-function-evaluation) +- [Adding auxiliary data and functions](#adding-auxiliary-data-and-functions) +- [Configuring SLURM options](#configuring-slurm-options) +- [Generating scripts for later submission](#generating-scripts-for-later-submission) +- [How it works / advanced customization](#how-it-works-advanced-customization) + + +## Basic example + +To illustrate a typical rslurm workflow, we use a simple function that takes +a mean and standard deviation as parameters, generates a million normal deviates +and returns the sample mean and standard deviation. + +```{r} +test_func <- function(par_mu, par_sd) { + samp <- rnorm(10^6, par_mu, par_sd) + c(s_mu = mean(samp), s_sd = sd(samp)) +} +``` + +We then create a parameter data frame where each row is a parameter set and each +column matches an argument of the function. + +```{r} +pars <- data.frame(par_mu = 1:10, + par_sd = seq(0.1, 1, length.out = 10)) +head(pars, 3) +``` + +We can now pass that function and the parameters data frame to `slurm_apply`, +specifiying the number of cluster nodes to use and the number of CPUs per node. +The latter (`cpus_per_node`) determines how many processes will be forked on +each node, as the `mc.cores` argument of `parallel::mcMap`. +```{r} +library(rslurm) +sjob <- slurm_apply(test_func, pars, jobname = "test_job", + nodes = 2, cpus_per_node = 2) +``` +The output of `slurm_apply` is a *slurm_job* object that stores a few pieces of +information (job name and number of nodes) needed to retrieve the job's output. + +Assuming the function is run on a machine with access to the cluster, it also +prints a message confirming the job has been submitted to SLURM. + +Particular clusters may require the specification of additional SLURM options, +such as time and memory limits for the job. Also, when running R on a local +machine without direct cluster access, you may want to generate scripts to be +copied to the cluster and run at a later time. These topics are covered in +additional sections below this basic example. + +After the job has been submitted, you can call `print_job_status` to display its +status (in queue, running or completed) or call `cancel_slurm` to cancel its +execution. These functions are R wrappers for the SLURM command line functions +`squeue` and `scancel`, respectively. + +Once the job completes, `get_slurm_out` reads and combines the output from all +nodes. +```{r} +res <- get_slurm_out(sjob, outtype = "table") +head(res, 3) +``` + +When `outtype = "table"`, the outputs from each function evaluation are +row-bound into a single data frame; this is an appropriate format when the +function returns a simple vector. The default `outtype = "raw"` combines the +outputs into a list and can thus handle arbitrarily complex return objects. + +```{r} +res_raw <- get_slurm_out(sjob, outtype = "raw", wait = FALSE) +res_raw[1:3] +``` + +The files generated by `slurm_apply` are saved in a folder named +*\_rslurm_[jobname]* under the current working directory. + +```{r} +dir("_rslurm_test_job") +``` + +The utility function `cleanup_files` deletes the temporary folder for the +specified *slurm_job*. + +```{r echo=FALSE} +cleanup_files(sjob) +``` + + +## Single function evaluation + +In addition to `slurm_apply`, rslurm also defines a `slurm_call` function, which +sends a single function call to the cluster. It is analogous in syntax to the +base R function `do.call`, accepting a function and a named list of parameters +as arguments. + +```{r} +sjob <- slurm_call(test_func, list(par_mu = 5, par_sd = 1)) +``` +```{r echo=FALSE} +cleanup_files(sjob) +``` + +Because `slurm_call` involves a single process on a single node, it does not +recognize the `nodes` and `cpus_per_node` arguments; otherwise, it accepts the +same additional arguments (detailed in the sections below) as `slurm_apply`. + + +## Adding auxiliary data and functions + +The function passed to `slurm_apply` can only receive atomic parameters stored +within a data frame. Suppose we want instead to apply a function `func` to a list +of complex R objects, `obj_list`. To use `slurm_apply` in this case, we can wrap +`func` in an inline function that takes an integer parameter. + +```{r echo=FALSE} +obj_list <- list(NULL) +func <- function(obj) {} +``` +```{r} +sjob <- slurm_apply(function(i) func(obj_list[[i]]), + data.frame(i = seq_along(obj_list)), + add_objects = c("func", "obj_list"), + nodes = 2, cpus_per_node = 2) +``` +```{r echo=FALSE} +cleanup_files(sjob) +``` + +The `add_objects` argument specifies the names of any R objects (besides the +parameters data frame) that must be accessed by the function passed to +`slurm_apply`. These objects are saved to a `.RData` file that is loaded +on each cluster node prior to evaluating the function in parallel. + +By default, all R packages attached to the current R session will also be +attached (with `library`) on each cluster node, though this can be modified with +the optional `pkgs` argument. + + +## Configuring SLURM options + +The `slurm_options` argument allows you to set any of the command line +options ([view list](http://slurm.schedmd.com/sbatch.html)) recognized by the +SLURM `sbatch` command. It should be formatted as a named list, using the long +names of each option (e.g. "time" rather than "t"). Flags, i.e. command line +options that are toggled rather than set to a particular value, should be set to +`TRUE` in `slurm_options`. For example, the following code: +```{r} +sjob <- slurm_apply(test_func, pars, + slurm_options = list(time = "1:00:00", share = TRUE)) +``` +```{r echo=FALSE} +cleanup_files(sjob) +``` +sets the command line options `--time=1:00:00 --share`. + + +## Generating scripts for later submission + +When working from a R session without direct access to the cluster, you can set +`submit = FALSE` within `slurm_apply`. The function will create the +*\_rslurm\_[jobname]* folder and generate the scripts and .RData files, without +submitting the job. You may then copy those files to the cluster and submit the +job manually by calling `sbatch submit.sh` from the command line. + + +## How it works / advanced customization + +As mentioned above, the `slurm_apply` function creates a job-specific folder. +This folder contains the parameters as a *.RDS* file and (if applicable) the objects +specified as `add_objects` saved together in a *.RData* file. The function also +generates a R script (`slurm_run.R`) to be run on each cluster node, as well +as a Bash script (`submit.sh`) to submit the job to SLURM. + +More specifically, the Bash script creates a SLURM job array, with each cluster +node receiving a different value of the *SLURM\_ARRAY\_TASK\_ID* environment +variable. This variable is read by `slurm_run.R`, which allows each instance of +the script to operate on a different parameter subset and write its output to +a different results file. The R script calls `parallel::mcMap` to parallelize +calculations on each node. + +Both `slurm_run.R` and `submit.sh` are generated from templates, using the +**whisker** package; these templates can be found in the `rslurm/templates` +subfolder in your R package library. There are two templates for each script, +one for `slurm_apply` and the other (with the word *single* in its title) for +`slurm_call`. + +While you should avoid changing any existing lines in the template scripts, you +may want to add `#SBATCH` lines to the `submit.sh` templates in order to +permanently set certain SLURM command line options and thus customize the package +to your particular cluster setup. \ No newline at end of file