diff --git a/NAMESPACE b/NAMESPACE
index 8c08ea78..0959ab48 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -220,11 +220,13 @@ importFrom(stats,predict)
importFrom(stats,printCoefmat)
importFrom(stats,qbinom)
importFrom(stats,qcauchy)
+importFrom(stats,qlogis)
importFrom(stats,qnorm)
importFrom(stats,qqline)
importFrom(stats,qqnorm)
importFrom(stats,quantile)
importFrom(stats,rbeta)
+importFrom(stats,rbinom)
importFrom(stats,reformulate)
importFrom(stats,rgamma)
importFrom(stats,rlnorm)
diff --git a/R/families.R b/R/families.R
index 7711267d..84f7424c 100644
--- a/R/families.R
+++ b/R/families.R
@@ -26,7 +26,7 @@
#' \item \code{nmix} for count data with imperfect detection modeled via a
#' State-Space N-Mixture model. The latent states are Poisson (with log link), capturing the 'true' latent
#' abundance, while the observation process is Binomial to account for imperfect detection. The
-#' observation formula in these models is used to set up a linear predictor for the detection
+#' observation \code{formula} in these models is used to set up a linear predictor for the detection
#' probability (with logit link). See the example below for a more detailed worked explanation
#' of the `nmix()` family
#' }
@@ -174,7 +174,7 @@ student_t = function(link = 'identity'){
#' det_plot <- plot(conditional_effects(mod,
#' type = 'detection',
#' effects = 'rainfall'),
-#' plot = FALSE
+#' plot = FALSE)
#' det_plot[[1]] +
#' ylab('Pr(detection)')
#'
diff --git a/R/mvgam.R b/R/mvgam.R
index b6c9f0b3..baaafd2a 100644
--- a/R/mvgam.R
+++ b/R/mvgam.R
@@ -35,7 +35,7 @@
#'@param trend_knots As for `knots` above, this is an optional \code{list} of knot values for smooth
#'functions within the `trend_formula`
#'@param data A \code{dataframe} or \code{list} containing the model response variable and covariates
-#'required by the GAM \code{formula}. Should include columns:
+#'required by the GAM \code{formula} and optional \code{trend_formula}. Should include columns:
#'`series` (a \code{factor} index of the series IDs;the number of levels should be identical
#'to the number of unique series labels (i.e. `n_series = length(levels(data$series))`))
#'`time` (\code{numeric} or \code{integer} index of the time point for each observation).
diff --git a/README.Rmd b/README.Rmd
index df593d54..0ad75ffc 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -33,7 +33,7 @@ A series of [vignettes cover data formatting, forecasting and several extended c
## Installation
Install from `GitHub` using:
-`devtools::install_github("nicholasjclark/mvgam")`. Note that to condition models with MCMC sampling, either `JAGS` (along with packages `rjags` and `runjags`) or `Stan` must be installed (along with either `rstan` and/or `cmdstanr`). Please refer to installation links for `JAGS` [here](https://sourceforge.net/projects/mcmc-jags/files/){target="_blank"}, for `Stan` with `rstan` [here](https://mc-stan.org/users/interfaces/rstan){target="_blank"}, or for `Stan` with `cmdstandr` [here](https://mc-stan.org/cmdstanr/){target="_blank"}. You will need a fairly recent version of `Stan` to ensure all syntax is recognized. If you see warnings such as `variable "array" does not exist`, this is usually a sign that you need to update `Stan`. We highly recommend you use `Cmdstan` through the `cmdstanr` interface. This is because `Cmdstan` is easier to install, is more up to date with new features, and uses less memory than `Rstan`. See [this documentation from the `Cmdstan` team for more information](http://mc-stan.org/cmdstanr/articles/cmdstanr.html#comparison-with-rstan){target="_blank"}.
+`devtools::install_github("nicholasjclark/mvgam")`. Note that to condition models on observed data, either `JAGS` (along with packages `rjags` and `runjags`) or `Stan` must be installed (along with either `rstan` and/or `cmdstanr`). Please refer to installation links for `JAGS` [here](https://sourceforge.net/projects/mcmc-jags/files/){target="_blank"}, for `Stan` with `rstan` [here](https://mc-stan.org/users/interfaces/rstan){target="_blank"}, or for `Stan` with `cmdstandr` [here](https://mc-stan.org/cmdstanr/){target="_blank"}. You will need a fairly recent version of `Stan` to ensure all syntax is recognized. If you see warnings such as `variable "array" does not exist`, this is usually a sign that you need to update `Stan`. We highly recommend you use `Cmdstan` through the `cmdstanr` interface. This is because `Cmdstan` is easier to install, is more up to date with new features, and uses less memory than `Rstan`. See [this documentation from the `Cmdstan` team for more information](http://mc-stan.org/cmdstanr/articles/cmdstanr.html#comparison-with-rstan){target="_blank"}.
## Citing mvgam and related software
When using any software please make sure to appropriately acknowledge the hard work that developers and maintainers put into making these packages available. Citations are currently the best way to formally acknowledge this work, so we highly encourage you to cite any packages that you rely on for your research.
diff --git a/README.md b/README.md
index 19c7be45..b035b155 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ been compiled:
Install from `GitHub` using:
`devtools::install_github("nicholasjclark/mvgam")`. Note that to
-condition models with MCMC sampling, either `JAGS` (along with packages
+condition models on observed data, either `JAGS` (along with packages
`rjags` and `runjags`) or `Stan` must be installed (along with either
`rstan` and/or `cmdstanr`). Please refer to installation links for
`JAGS`
#>
#> GAM coefficient (beta) estimates:
-#> 2.5% 50% 97.5% Rhat n_eff
-#> (Intercept) 6.10 6.600 7.000 1.00 355
-#> s(season).1 -0.59 0.046 0.710 1.00 1022
-#> s(season).2 -0.26 0.770 1.800 1.00 443
-#> s(season).3 -0.12 1.100 2.400 1.00 399
-#> s(season).4 -0.51 0.410 1.300 1.00 890
-#> s(season).5 -1.20 -0.130 0.950 1.01 503
-#> s(season).6 -1.00 -0.011 1.100 1.01 699
-#> s(season).7 -0.71 0.340 1.400 1.00 711
-#> s(season).8 -0.92 0.180 1.800 1.00 371
-#> s(season).9 -1.10 -0.290 0.710 1.00 476
-#> s(season).10 -1.30 -0.660 0.027 1.00 595
+#> 2.5% 50% 97.5% Rhat n_eff
+#> (Intercept) 6.200 6.6000 7.000 1 1020
+#> s(season).1 -0.590 0.0500 0.700 1 1094
+#> s(season).2 -0.240 0.8100 1.800 1 417
+#> s(season).3 -0.057 1.2000 2.500 1 365
+#> s(season).4 -0.510 0.4200 1.400 1 951
+#> s(season).5 -1.200 -0.1400 1.000 1 538
+#> s(season).6 -1.100 0.0074 1.100 1 632
+#> s(season).7 -0.790 0.3800 1.400 1 847
+#> s(season).8 -1.000 0.2900 1.800 1 413
+#> s(season).9 -1.100 -0.2700 0.670 1 574
+#> s(season).10 -1.400 -0.6900 -0.025 1 641
#>
#> Approximate significance of GAM observation smooths:
-#> edf Chi.sq p-value
-#> s(season) 5.08 17751 0.25
+#> edf Chi.sq p-value
+#> s(season) 5 17851 0.28
#>
#> Latent trend AR parameter estimates:
#> 2.5% 50% 97.5% Rhat n_eff
-#> ar1[1] 0.74 1.10 1.400 1 635
-#> ar2[1] -0.84 -0.40 0.062 1 1514
-#> ar3[1] -0.47 -0.13 0.290 1 540
-#> sigma[1] 0.40 0.50 0.640 1 1154
+#> ar1[1] 0.74 1.10 1.400 1 695
+#> ar2[1] -0.82 -0.41 0.045 1 1630
+#> ar3[1] -0.47 -0.12 0.280 1 453
+#> sigma[1] 0.40 0.50 0.630 1 1101
#>
#> Stan MCMC diagnostics:
#> n_eff / iter looks reasonable for all parameters
@@ -348,7 +348,7 @@ summary(lynx_mvgam)
#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
#> E-FMI indicated no pathological behavior
#>
-#> Samples were drawn using NUTS(diag_e) at Mon Jan 15 8:57:57 AM 2024.
+#> Samples were drawn using NUTS(diag_e) at Tue Jan 16 8:42:08 PM 2024.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split MCMC chains
#> (at convergence, Rhat = 1)
@@ -470,7 +470,7 @@ plot(lynx_mvgam, type = 'forecast', newdata = lynx_test)
#> Out of sample CRPS:
- #> [1] 2892.767
+ #> [1] 2856.97
And the estimated latent trend component, again using the more flexible
`plot_mvgam_...()` option to show first derivatives of the estimated
@@ -626,7 +626,7 @@ summary(mod, include_betas = FALSE)
#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
#> E-FMI indicated no pathological behavior
#>
-#> Samples were drawn using NUTS(diag_e) at Mon Jan 15 8:59:01 AM 2024.
+#> Samples were drawn using NUTS(diag_e) at Tue Jan 16 8:44:20 PM 2024.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split MCMC chains
#> (at convergence, Rhat = 1)
diff --git a/docs/index.html b/docs/index.html
index f1389398..093662da 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -95,6 +95,16 @@ Installation. Note that to actually condition models with MCMC sampling, either the
JAGS
software must be installed (along with the R
packages rjags
and runjags
) or the Stan
software must be installed (along with either rstan
and/or cmdstanr
). Only rstan
is listed as a dependency of mvgam
to ensure that installation is less difficult. If users wish to fit the models using mvgam
, please refer to installation links for JAGS
here, for Stan
with rstan
here, or for Stan
with cmdstandr
here. You will need a fairly recent version of Stan
to ensure all the model syntax is recognized. If you see warnings such as variable "array" does not exist
, this is usually a sign that you need to update your version of Stan
. We highly recommend you use Cmdstan
through the cmdstanr
interface as the backend. This is because Cmdstan
is easier to install, is more up to date with new features, and uses less memory than Rstan
. See this documentation from the Cmdstan
team for more information.
nmix
for count data with imperfect detection modeled via a
State-Space N-Mixture model. The latent states are Poisson (with log link), capturing the 'true' latent
abundance, while the observation process is Binomial to account for imperfect detection. The
-observation formula in these models is used to set up a linear predictor for the detection
+observation formula
in these models is used to set up a linear predictor for the detection
probability (with logit link). See the example below for a more detailed worked explanation
of the nmix()
family
Only poisson()
, nb()
, and tweedie()
are available if
@@ -102,7 +102,135 @@
if (FALSE) {
+# An N-mixture example using family nmix()
+# Simulate data from a Poisson-Binomial N-Mixture model
+# True abundance is predicted by a single nonlinear function of temperature
+# as well as a nonlinear long-term trend (as a Gaussian Process function)
+set.seed(123)
+gamdat <- gamSim(n = 80); N <- NROW(gamdat)
+abund_linpred <- gamdat$y; temperature <- gamdat$x2
+trend <- mvgam:::sim_gp(rnorm(3, 0, 0.1), alpha_gp = 3,
+ rho_gp = 16, h = N)
+true_abund <- floor(10 + abund_linpred + trend)
+
+# Detection probability increases linearly with decreasing rainfall
+rainfall <- rnorm(N)
+detect_linpred <- 0.4 + -0.55 * rainfall
+detect_prob <- plogis(detect_linpred)
+
+# Simulate observed counts
+obs_abund <- rbinom(N, size = true_abund, prob = detect_prob)
+
+# Plot true and observed time series
+plot(true_abund,
+ type = 'l',
+ ylab = 'Abundance',
+ xlab = 'Time',
+ ylim = c(0, max(true_abund)),
+ bty = 'l',
+ lwd = 2)
+lines(obs_abund, col = 'darkred', lwd = 2)
+title(main = 'True = black; observed = red')
+
+# Gather data into a dataframe suitable for mvgam modelling;
+# This will require a 'cap' variable specifying the maximum K to marginalise
+# over when estimating latent abundance (it does NOT have to be a fixed value)
+model_dat <- data.frame(obs_abund,
+ temperature,
+ rainfall,
+ cap = max(obs_abund) + 20,
+ time = 1:N,
+ series = as.factor('series1'))
+
+# Training and testing folds
+data_train <- model_dat %>% dplyr::filter(time <= 74)
+data_test <- model_dat %>% dplyr::filter(time > 74)
+
+# Fit a model with informative priors on the two intercept parameters
+# and on the length scale of the GP temporal trend parameter
+# Note that the 'trend_formula' applies to the latent count process
+# (a Poisson process with log-link), while the 'formula' applies to the
+# detection probability (logit link)
+mod <- mvgam(formula = obs_abund ~ rainfall,
+ trend_formula = ~ s(temperature, k = 5) +
+ gp(time, k = 10, c = 5/4, scale = FALSE),
+ family = nmix(),
+ data = data_train,
+ newdata = data_test,
+ priors = c(prior(std_normal(), class = '(Intercept)'),
+ prior(normal(2, 2), class = '(Intercept)_trend'),
+ prior(normal(15, 5), class = 'rho_gp_trend(time)')))
+
+# Model summary and diagnostics
+summary(mod)
+plot(mod, type = 'residuals')
+
+# Intercept parameters
+mcmc_plot(mod,
+ variable = "Intercept",
+ regex = TRUE,
+ type = 'hist')
+
+# Hindcasts and forecasts of latent abundance (with truth overlain)
+fc <- forecast(mod, type = 'latent_N')
+plot(fc); points(true_abund, pch = 16, cex = 0.8)
+
+# Latent abundance predictions are restricted based on 'cap'
+max(model_dat$cap); range(fc$forecasts[[1]])
+
+# Hindcasts and forecasts of detection probability (with truth overlain)
+fc <- forecast(mod, type = 'detection')
+plot(fc); points(detect_prob, pch = 16, cex = 0.8)
+
+# Hindcasts and forecasts of observations
+# (after accounting for detection error)
+fc <- forecast(mod, type = 'response')
+plot(fc)
+
+# Hindcasts and forecasts of response expectations
+# (with truth overlain)
+fc <- forecast(object = mod, type = 'expected')
+plot(fc); points(detect_prob * true_abund, pch = 16, cex = 0.8)
+
+# Plot conditional effects
+library(ggplot2)
+
+# Effects on true abundance can be visualised using type = 'link'
+abund_plots <- plot(conditional_effects(mod,
+ type = 'link',
+ effects = c('temperature', 'time')),
+ plot = FALSE)
+
+# Effect of temperature on abundance
+abund_plots[[1]] +
+ ylab('Latent abundance')
+
+# Long-term trend in abundance
+abund_plots[[2]] +
+ ylab('Latent abundance')
+
+# Effect of rainfall on detection probability
+det_plot <- plot(conditional_effects(mod,
+ type = 'detection',
+ effects = 'rainfall'),
+ plot = FALSE)
+det_plot[[1]] +
+ ylab('Pr(detection)')
+
+# More targeted plots can use marginaleffects capabilities;
+# Here visualise how response predictions might change
+# if we considered different possible 'cap' limits on latent
+# abundance and different rainfall measurements
+plot_predictions(mod, condition = list('temperature',
+ cap = c(15, 20, 40),
+ rainfall = c(-1, 1)),
+ type = 'response',
+ conf_level = 0.5) +
+ ylab('Observed abundance') +
+ theme_classic()
+}
+