Skip to content

Commit

Permalink
Merge pull request #15 from seroanalytics/i9
Browse files Browse the repository at this point in the history
support working with devtools
  • Loading branch information
hillalex authored Oct 7, 2024
2 parents 24120fb + e0b5298 commit badf860
Show file tree
Hide file tree
Showing 12 changed files with 266 additions and 15 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/test-snapshots.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Snapshot testing

on:
pull_request:
branches:
- main

env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
snapshot:
runs-on: ubuntu-latest
steps:
- name: ⬇️ Checkout repo
uses: actions/checkout@v4

- name: Run tests
run: ./tests/snapshots/test-snapshots
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ src/stan/**
src/stan/**/*.exe
src/stan/**/*.EXE
inst/doc
bin
.idea
*.png
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
Imports:
cmdstanr,
data.table,
forcats,
fs,
instantiate,
logger,
mosaic,
pkgload,
R6,
stats,
stringr,
Expand Down
2 changes: 2 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ summarise_draws <- function(dt_in, column_name, by = by) {

build_covariate_lookup_table <- function(data, design_matrix, all_formula_vars) {

p_name <- NULL

if (length(all_formula_vars) == 0) {
return(NULL)
}
Expand Down
43 changes: 43 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
.onLoad <- function(libname, pkgname) {

# When installing the package, src/install.libs.R compiles and installs the
# stan model files. Then when loading an installed version of the package,
# instantiate::stan_package_model will look in the installation directory to
# find the executable. But pkgload::load_all does not simulate behaviour of
# src/install.libs.R so here we compile the stan models and move
# them into a place where the pkgload system.file shim can find them
if (pkgload::is_loading()) {

# User may not have cmdstan installed yet
if (is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE))) {
packageStartupMessage("Installing cmdstan")
cmdstanr::install_cmdstan()
} else {
packageStartupMessage(paste("Found cmdstan at path",
cmdstanr::cmdstan_path()))
}
# When epikinetics installation is simulated using load_all, the system.file
# shim looks for files in the local source directory so here we create a
# temporary 'bin/stan' directory in the source directory so that calls to
# system.file("bin/stan/model.stan", "epikinetics") will resolve correctly
if (nchar(libname) == 1) {
libname <- ""
}
bin <- file.path(libname, pkgname, "bin")
if (!dir.exists(bin)) {
packageStartupMessage("Creating local bin directory")
dir.create(bin, recursive = TRUE, showWarnings = TRUE)
}
packageStartupMessage("Copying stan files")
bin_stan <- file.path(libname, pkgname, "bin", "stan")
source_path <- file.path(libname, pkgname, "src", "stan")
fs::dir_copy(path = source_path, new_path = bin_stan, overwrite = TRUE)
message(fs::dir_ls(bin))
instantiate::stan_package_compile(
models = instantiate::stan_package_model_files(path = bin_stan),
cpp_options = list(stan_threads = TRUE),
stanc_options = list("O1")
)
packageStartupMessage("Finished compiling models")
}
}
50 changes: 46 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ an explanation of the input format.

# Installing

This package uses `cmdstanr`, which isn't available on cran, so you will first have to install it as follows:
To interface with [cmdstan](https://mc-stan.org/users/interfaces/cmdstan), this package uses `cmdstanr`, which isn't available on cran, so you will first have to install it as follows:

```
install.packages('cmdstanr', repos = c('https://stan-dev.r-universe.dev', getOption('repos')))
Expand All @@ -32,6 +32,23 @@ You can then install `epikinetics` from GitHub:
remotes::install_github("seroanalytics/epikinetics")
```

## Troubleshooting installation

If you don't already have [cmdstan](https://mc-stan.org/users/interfaces/cmdstan) installed, the `epikinetics` installer will attempt
to install it, which can take a few minutes. If you see errors as part of installation, it is
probably a good idea to try and install `cmdstan` first, for easier debugging. You can
do this using the `cmdstanr` package as follows:

```{r}
cmdstanr::install_cmdstan()
```

Verify the installation is working with

```{r}
cmdstanr::cmdstan_version()
```

# Running in Docker

Alternatively, you can run `epikinetics` via a Docker image, mounting a working directory which contains your input data files:
Expand All @@ -44,10 +61,35 @@ docker run -v /path/to/local/workdir:/workdir -it seroanalytics/epikinetics:main
# Developing

This package relies on the [instantiate](https://wlandau.github.io/instantiate/) package
to ship pre-compiled stan models. See the `src/install.libs.R` file for the logic.
to ship pre-compiled stan models. See the `src/install.libs.R` file for the logic for compiling
and installing the stan models during package installation.

When running via `devtools` (e.g. `test` or `load_all`) the `install.libs.R` logic is not run,
so for this we use an `onLoad` hook which checks whether the package is being loaded via `devtools`
and if so, copies compiled models into a local `bin` directory where the `system.file` shim
can access them.

Note that if you are running `devtools::load_all` or `devtool::test` and you don't yet
have `cmdstan` installed, this will trigger an installation of `cmdstan` which can take
a few minutes.

## Testing

Most tests are run with

```{r}
devtools::test()
```

For snapshot testing of stan model outputs, we need the outputs to be exactly
reproducible. As well as setting a seed, this requires the machine environment
to be exactly the same, so we run these inside a Docker container, via a bash script:

```{shell}
./tests/snapshots/test-snapshots
```

Annoyingly, `devtools::load_all()` won't work due to [this issue](https://github.com/r-lib/devtools/issues/1444). For testing
local changes you will have to actually run `devtools::install()`.
This involves recompiling the model, so takes a while to run.

## Docker
To build a Docker image, run `docker/build`.
Expand Down
16 changes: 16 additions & 0 deletions snapshot-tests.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM rocker/tidyverse:4

RUN apt-get update
RUN apt-get install libtbb-dev -y
RUN Rscript -e "install.packages('cmdstanr', repos = c('https://stan-dev.r-universe.dev', getOption('repos')))"
RUN Rscript -e "cmdstanr::install_cmdstan()"

WORKDIR /epikinetics
COPY DESCRIPTION /epikinetics

RUN Rscript -e "devtools::install_deps()"
RUN Rscript -e "install.packages('decor')"

COPY . /epikinetics
COPY tests/snapshots/test-snapshots.R /epikinetics/tests/testthat
COPY tests/snapshots/_snaps /epikinetics/tests/testhat
18 changes: 7 additions & 11 deletions src/install.libs.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@ if (!file.exists(bin)) {
}
bin_stan <- file.path(bin, "stan")
fs::dir_copy(path = "stan", new_path = bin_stan)
callr::r(
func = function(bin_stan) {
models <- instantiate::stan_package_model_files(path = bin_stan)
message(paste("Compiling models:", paste0(models, collapse = ",")))
instantiate::stan_package_compile(
models = instantiate::stan_package_model_files(path = bin_stan),
cpp_options = list(stan_threads = TRUE),
stanc_options = list("O1")
)
},
args = list(bin_stan = bin_stan)
models <- instantiate::stan_package_model_files(path = bin_stan)
message(paste("Compiling models:", paste0(models, collapse = ",")))
instantiate::stan_package_compile(
models = instantiate::stan_package_model_files(path = bin_stan),
cpp_options = list(stan_threads = TRUE),
stanc_options = list("O1")
)
message("Finished compiling models")
81 changes: 81 additions & 0 deletions tests/snapshots/_snaps/snapshots.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Model fits are the same

Code
delta
Output
variable mean median sd mad q5 q95 rhat ess_bulk
lp__ -1184.11 -1178.54 58.33 56.66 -1277.41 -1101.09 1.09 35
t0_pop[1] 4.11 4.11 0.27 0.30 3.69 4.56 1.03 155
t0_pop[2] 4.77 4.77 0.26 0.26 4.33 5.17 1.04 72
t0_pop[3] 3.50 3.49 0.29 0.30 3.04 3.97 1.01 324
tp_pop[1] 9.51 9.53 0.70 0.73 8.27 10.57 1.07 40
tp_pop[2] 10.74 10.74 0.64 0.64 9.71 11.78 1.08 37
tp_pop[3] 8.84 8.84 0.84 0.88 7.47 10.14 1.08 40
ts_pop_delta[1] 52.74 52.69 2.75 2.71 48.15 57.29 1.00 494
ts_pop_delta[2] 61.67 61.58 2.82 2.95 56.79 66.53 1.01 142
ts_pop_delta[3] 49.86 49.73 2.65 2.68 45.61 54.34 1.01 355
ess_tail
103
289
220
412
340
307
227
416
370
348
# showing 10 of 10103 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

# Population trajectories are the same

Code
trajectories
Output
t me lo hi titre_type
<int> <num> <num> <num> <char>
1: 0 76.25839 56.51254 102.5247 Ancestral
2: 1 94.45253 72.00175 121.8491 Ancestral
3: 2 116.75048 90.41975 144.7452 Ancestral
4: 3 144.43023 113.33132 180.0799 Ancestral
5: 4 178.01885 141.05339 224.6319 Ancestral
---
902: 146 163.39995 128.89520 201.2052 Delta
903: 147 162.78005 128.23676 200.4452 Delta
904: 148 162.15565 127.58168 199.7415 Delta
905: 149 161.54259 126.92995 199.2712 Delta
906: 150 160.95822 126.28154 198.8112 Delta
infection_history
<char>
1: Infection naive
2: Infection naive
3: Infection naive
4: Infection naive
5: Infection naive
---
902: Previously infected (Pre-Omicron)
903: Previously infected (Pre-Omicron)
904: Previously infected (Pre-Omicron)
905: Previously infected (Pre-Omicron)
906: Previously infected (Pre-Omicron)

# Individual trajectories are the same

Code
trajectories
Output
calendar_date titre_type me lo hi time_shift
<IDat> <char> <num> <num> <num> <num>
1: 2021-03-08 Ancestral 543.79451 433.09823 670.4975 0
2: 2021-03-09 Ancestral 528.88463 429.80874 653.8752 0
3: 2021-03-10 Ancestral 545.26438 444.31407 665.8502 0
4: 2021-03-11 Ancestral 522.29561 418.67869 631.7992 0
5: 2021-03-12 Ancestral 531.74586 423.99750 648.3266 0
---
1775: 2022-08-07 Delta 91.77050 31.07070 429.5746 0
1776: 2022-08-08 Delta 91.18062 31.07408 424.9366 0
1777: 2022-08-09 Delta 94.16216 31.29551 426.2925 0
1778: 2022-08-10 Delta 90.74115 30.39902 426.8355 0
1779: 2022-08-11 Delta 92.97980 28.91787 438.0270 0

5 changes: 5 additions & 0 deletions tests/snapshots/test-snapshots
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash
set -ex

docker build . -f snapshot-tests.Dockerfile -t epikinetics-test
docker run epikinetics-test Rscript -e "devtools::test(filter='snapshot')"
30 changes: 30 additions & 0 deletions tests/snapshots/test-snapshots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
instantiate::stan_package_compile(
models = instantiate::stan_package_model_files(path = "/epikinetics/bin/stan"),
cpp_options = list(stan_threads = TRUE),
stanc_options = list("O1"),
force_recompile = TRUE
)
dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- biokinetics$new(data = dat, covariate_formula = ~0 + infection_history)
delta <- mod$fit(parallel_chains = 4,
iter_warmup = 50,
iter_sampling = 100,
seed = 100)

local_edition(3)

test_that("Model fits are the same", {
expect_snapshot(delta)
})

test_that("Population trajectories are the same", {
set.seed(1)
trajectories <- mod$simulate_population_trajectories()
expect_snapshot(trajectories)
})

test_that("Individual trajectories are the same", {
set.seed(1)
trajectories <- mod$simulate_individual_trajectories()
expect_snapshot(trajectories)
})
9 changes: 9 additions & 0 deletions tests/testthat/test-run-model.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ mock_model_no_covariates <- function(name, package) {
list(sample = function(x, ...) readRDS(test_path("testdata", "testdraws_nocovariates.rds")))
}

test_that("Can run model", {
dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- biokinetics$new(data = dat, covariate_formula = ~0 + infection_history)
delta <- mod$fit(parallel_chains = 4,
iter_warmup = 10,
iter_sampling = 100)
expect_equal(class(delta), c("CmdStanMCMC", "CmdStanFit", "R6"))
})

test_that("Can fit model with arguments", {
local_mocked_bindings(
stan_package_model = mock_model_return_args, .package = "instantiate"
Expand Down

0 comments on commit badf860

Please sign in to comment.