diff --git a/DESCRIPTION b/DESCRIPTION index 8ad62f2..16fe5fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: bigDM Type: Package Title: Scalable Bayesian Disease Mapping Models for High-Dimensional Data -Version: 0.5.3 -Date: 2023-10-17 +Version: 0.5.4 +Date: 2024-05-30 Authors@R: c(person(given = "Aritz", family = "Adin", @@ -31,5 +31,5 @@ License: GPL-3 Encoding: UTF-8 LazyData: true LazyDataCompression: xz -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Config/testthat/edition: 3 diff --git a/NEWS b/NEWS index 3472872..dd454a1 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +version 0.5.4 +- small bugs fixed and performance improvements +- package built for R-4.4 + version 0.5.3 - bugs fixed - faster implementation of divide_carto() function diff --git a/R/CAR_INLA.R b/R/CAR_INLA.R index b761d98..87e37c3 100644 --- a/R/CAR_INLA.R +++ b/R/CAR_INLA.R @@ -173,26 +173,25 @@ CAR_INLA <- function(carto=NULL, ID.area=NULL, ID.group=NULL, O=NULL, E=NULL, X= ## Transform 'SpatialPolygonsDataFrame' object to 'sf' class carto <- sf::st_as_sf(carto) - data <- sf::st_set_geometry(carto, NULL) ## Add the covariates defined in the X argument ## if(!is.null(X)){ if(is.matrix(X)){ - if(!isTRUE(all.equal(rownames(X),as.character(data[,ID.area])))){ + if(!isTRUE(all.equal(rownames(X),as.character(sf::st_set_geometry(carto, NULL)[,ID.area])))){ stop(sprintf("row names of 'X' must match with the IDs of the spatial units defined by the '%s' variable",ID.area)) }else{ if(is.null(colnames(X))) colnames(X) <- paste("X",seq(ncol(X)),sep="") carto <- cbind(carto,X) - data <- sf::st_set_geometry(carto, NULL) X <- colnames(X) } } - if(!all(X %in% colnames(data))){ - stop(sprintf("'%s' variable not found in carto object",X[!X %in% colnames(data)])) + if(!all(X %in% names(carto))){ + stop(sprintf("'%s' variable not found in carto object",X[!X %in% names(carto)])) }else{ - carto[,X] <- scale(data[,X]) + carto[,X] <- scale(sf::st_set_geometry(carto, NULL)[,X]) } } + data <- sf::st_set_geometry(carto, NULL) ## Order the data ## if(!ID.area %in% colnames(data)) @@ -210,6 +209,7 @@ CAR_INLA <- function(carto=NULL, ID.area=NULL, ID.group=NULL, O=NULL, E=NULL, X= }else{ order.data <- FALSE } + rownames(data) <- NULL ## Merge disjoint connected subgraphs ## if(is.null(W)){ @@ -299,6 +299,7 @@ CAR_INLA <- function(carto=NULL, ID.area=NULL, ID.group=NULL, O=NULL, E=NULL, X= control.predictor=list(compute=TRUE, link=1, cdf=c(log(1))), control.compute=list(dic=TRUE, cpo=TRUE, waic=TRUE, config=TRUE, return.marginals.predictor=TRUE), control.inla=list(strategy=strategy), ...) + return(models) } @@ -385,8 +386,8 @@ CAR_INLA <- function(carto=NULL, ID.area=NULL, ID.group=NULL, O=NULL, E=NULL, X= t.eigen <- 0 } - Model$cpu.used <- c(time+Model$cpu.used[4],t.eigen+time+Model$cpu.used[4]) - names(Model$cpu.used) <- c("INLA.time","Total.time") + Model$cpu.used <- c(time+Model$cpu.used[4],t.eigen,t.eigen+time+Model$cpu.used[4]) + names(Model$cpu.used) <- c("INLA.time","eigen.time","Total") } } diff --git a/R/MCAR_INLA.R b/R/MCAR_INLA.R index 099e1f8..a2cc99c 100644 --- a/R/MCAR_INLA.R +++ b/R/MCAR_INLA.R @@ -171,6 +171,7 @@ MCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.disease=NULL, ID.g data$geometry <- NULL data[,ID.disease] <- paste(sprintf("%02d", as.numeric(as.character(data[,ID.disease])))) data <- data[order(data[,ID.disease],data[,ID.area]),] + rownames(data) <- NULL if(!all(order(data[,ID.disease],data[,ID.area])==order(data.old[,ID.disease],data.old[,ID.area]))){ order.data <- TRUE @@ -303,8 +304,7 @@ MCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.disease=NULL, ID.g ## Partition model ## if(model=="partition"){ - if(is.null(ID.group)) - stop("the ID.group argument is missing") + if(is.null(ID.group)) stop("the ID.group argument is missing") cat("STEP 2:",sprintf("Fitting partition (k=%d) model with INLA",k),"\n") @@ -395,7 +395,7 @@ MCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.disease=NULL, ID.g #' Compute correlation coefficients between diseases #' -#' @description This function takes a \code{inla} object fitted using the \code{\link{MCAR_INLA}} function and computes the correlation coefficients between diseases. See Details for more information. +#' @description This function takes a \code{inla} object fitted using the \code{\link{MCAR_INLA}} function and computes the correlation coefficients between diseases. #' #' @param model object of class \code{inla} fitted using the \code{\link{MCAR_INLA}} function. #' @param n.sample numeric; number of samples to generate from the approximated joint posterior for the hyperparameters (see \code{help(inla.hyperpar.sample)}). Default to 1000. diff --git a/R/STCAR_INLA.R b/R/STCAR_INLA.R index 16d97f9..1fb31d2 100644 --- a/R/STCAR_INLA.R +++ b/R/STCAR_INLA.R @@ -171,6 +171,8 @@ STCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.year=NULL, ID.gro } if(!all(X %in% colnames(data))){ stop(sprintf("'%s' variable not found in data object",X[!X %in% colnames(data)])) + }else{ + data[,X] <- scale(data[,X]) } } @@ -192,6 +194,7 @@ STCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.year=NULL, ID.gro data$geometry <- NULL data[,ID.year] <- paste(sprintf("%02d", as.numeric(as.character(data[,ID.year])))) data <- data[order(data[,ID.year],data[,ID.area]),] + rownames(data) <- NULL if(!all(order(data[,ID.year],data[,ID.area])==order(data.old[,ID.year],data.old[,ID.area]))){ order.data <- TRUE @@ -427,6 +430,7 @@ STCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.year=NULL, ID.gro control.predictor=list(compute=TRUE, link=1, cdf=c(log(1))), control.compute=list(dic=TRUE, cpo=TRUE, waic=TRUE, config=TRUE, return.marginals.predictor=TRUE), control.inla=list(strategy=strategy), ...) + return(models) } @@ -510,7 +514,7 @@ STCAR_INLA <- function(carto=NULL, data=NULL, ID.area=NULL, ID.year=NULL, ID.gro on.exit(future::plan(oplan)) cpu.time <- system.time({ - inla.models <- future.apply::future_mapply(FitModels, Rs=Rs, Rs.Leroux=Rs.Leroux, R=R, r.def=r.def, A.constr=A.constr, data.INLA=data.INLA, d=seq(1,D), D=D, num.threads=num.threads, inla.mode=inla.mode, future.seed=TRUE, SIMPLIFY=FALSE) + inla.models <- future.apply::future_mapply(FitModels, Rs=Rs, Rs.Leroux=Rs.Leroux, R=R, r.def=r.def, A.constr=A.constr, data.INLA=data.INLA, d=seq(1,D), D=D, num.threads=num.threads, inla.mode=inla.mode, future.seed=TRUE, SIMPLIFY=FALSE) }) stopCluster(cl) diff --git a/R/bigDM-package.R b/R/bigDM-package.R index e11e79c..cacddc5 100644 --- a/R/bigDM-package.R +++ b/R/bigDM-package.R @@ -32,7 +32,7 @@ #' \insertRef{orozco2020}{bigDM} #' #' \insertRef{orozco2022}{bigDM} -#' +#' #' \insertRef{vicente2022}{bigDM} #' #' @seealso See the following vignettes for further details and examples using this package: @@ -44,9 +44,8 @@ #' } #' #' @examples -#' ## See the examples for CAR_INLA, MCAR_INLA and STCAR_INLA functions +#' ## See the examples for CAR_INLA, MCAR_INLA and STCAR_INLA functions ## #' -#' @docType package #' @name bigDM-package #' @aliases bigDM -NULL +"_PACKAGE" diff --git a/R/mergeINLA.R b/R/mergeINLA.R index 41ea4cb..1e4f0c3 100644 --- a/R/mergeINLA.R +++ b/R/mergeINLA.R @@ -25,13 +25,13 @@ #' @param compute.fitted.values logical value (default \code{FALSE}); if \code{TRUE} transforms the posterior marginal distribution of the linear predictor to the exponential scale (risks or rates). CAUTION: This method might be time consuming. #' #' @return This function returns an object of class \code{inla} containing the following elements: -#' \item{\code{summary.fixed}}{A data.frame containing the mean, standard deviation, quantiles and mode of the model's fixed effects.} -#' \item{\code{marginals.fixed}}{A list containing the posterior marginal density of the model's fixed effects.} -#' \item{\code{summary.fixed.partition}}{A data.frame containing the mean, standard deviation, quantiles and mode of the model's fixed effects in each partition.} +#' \item{\code{summary.fixed}}{A data.frame containing the mean, standard deviation and quantiles of the model's fixed effects. This feature is EXPERIMENTAL for the moment.} +#' \item{\code{marginals.fixed}}{A list containing the posterior marginal density of the model's fixed effects. This feature is EXPERIMENTAL for the moment.} +#' \item{\code{summary.fixed.partition}}{A data.frame containing the mean, standard deviation and quantiles of the model's fixed effects in each partition.} #' \item{\code{marginals.fixed.partition}}{A list containing the posterior marginal density of the model's fixed effects in each partition.} -#' \item{\code{summary.random}}{If \code{k=0} a list with a data.frame containing the mean, standard deviation, quantiles and mode of the model's random effects.} +#' \item{\code{summary.random}}{If \code{k=0} a list with a data.frame containing the mean, standard deviation and quantiles of the model's random effects.} #' \item{\code{marginals.random}}{If \code{k=0} a list containing the posterior marginal densities of the model's random effects.} -#' \item{\code{summary.linear.predictor}}{If \code{k=0} a data.frame containing the mean, standard deviation, quantiles and mode of the log-risks (or log-rates) in the model.} +#' \item{\code{summary.linear.predictor}}{If \code{k=0} a data.frame containing the mean, standard deviation and quantiles of the log-risks (or log-rates) in the model.} #' \item{\code{marginals.linear.predictor}}{If \code{k=0} a list containing the posterior marginal densities of the log-risks (or log-rates) in the model.} #' \item{\code{summary.fitted.values}}{A data.frame containing the mean, standard deviation, quantiles, mode and cdf of the risks (or rates) in the model. Available only if \code{compute.fitted.values=TRUE}.} #' \item{\code{marginals.fitted.values}}{A list containing the posterior marginal densities of the risks (or rates) in the model. Available only if \code{compute.fitted.values=TRUE}.} @@ -55,7 +55,7 @@ #' @importFrom rlist list.flatten #' #' @examples -#' ## See the vignettes accompanying this package for an example of its use. +#' ## See the vignettes accompanying this package ## #' #' @export mergeINLA <- function(inla.models=list(), k=NULL, ID.area="Area", ID.year=NULL, ID.disease=NULL, O="O", E="E", @@ -148,6 +148,9 @@ mergeINLA <- function(inla.models=list(), k=NULL, ID.area="Area", ID.year=NULL, fixed.CMC <- compute.CMC(marginals=result$marginals.fixed.partition, names=unlist(names.fixed)) result$summary.fixed <- fixed.CMC$summary.CMC result$marginals.fixed <- fixed.CMC$marginals.CMC + + result$summary.fixed <- result$summary.fixed[-union(which(rownames(result$summary.fixed)=="(Intercept)"),grep("^I",rownames(result$summary.fixed))),] + result$marginals.fixed[union(which(names(result$marginals.fixed)=="(Intercept)"),grep("^I",names(result$marginals.fixed)))] <- NULL } @@ -380,6 +383,7 @@ mergeINLA <- function(inla.models=list(), k=NULL, ID.area="Area", ID.year=NULL, result$.args$data$ID <- as.character(result$.args$data[,ID.area]) result$.args$data <- result$.args$data[!duplicated(result$.args$data$ID),] result$.args$data <- result$.args$data[order(result$.args$data$ID),] + result$.args$data$ID.area <- seq(1:nrow(result$.args$data)) }else{ result$.args$data$ID <- paste(result$.args$data[,ID.disease],result$.args$data[,ID.area],sep=".") result$.args$data <- result$.args$data[!duplicated(result$.args$data$ID),] @@ -567,7 +571,7 @@ mergeINLA <- function(inla.models=list(), k=NULL, ID.area="Area", ID.year=NULL, } if(length(names.var)>1){ - stop("Different variances for INLA models") + stop("Different variables for INLA models") }else{ aux <- do.call(rbind,lapply(inla.models, function(x) x$summary.var)) rownames(aux) <- paste(rep(unlist(names.var),D),rep(formatC(1:D, width=ceiling(log(D+1,10)), flag='0'),each=length(unlist(names.var))),sep=".") @@ -585,7 +589,7 @@ mergeINLA <- function(inla.models=list(), k=NULL, ID.area="Area", ID.year=NULL, } }) - result$cpu.used <- c(result$cpu.used[1:3], Merging=as.numeric(tt[3]), Total=as.numeric(result$cpu.used[4]+tt[3])) + result$cpu.used <- c(Running=as.numeric(result$cpu.used["Total"]), Merging=as.numeric(tt[3]), Total=as.numeric(result$cpu.used["Total"]+tt[3])) return(result) }else{ diff --git a/R/random_partition.R b/R/random_partition.R index 0c32bc5..9cb66f9 100644 --- a/R/random_partition.R +++ b/R/random_partition.R @@ -98,11 +98,10 @@ random_partition <- function(carto, rows=3, columns=3, min.size=50, max.size=100 ID.group[aux[[i]]] <- i } carto$ID.group <- factor(as.numeric(factor(ID.group))) + partition.size <- table(carto$ID.group) ## Merge the subregions with lower number of areas than min.size ## if(!is.null(min.size)){ - partition.size <- table(carto$ID.group) - while(any(partition.sizemax.size)) warning(sprintf("%d subregion(s) have more than %d areas",sum(table(carto$ID.group)>max.size),max.size), call.=FALSE) + carto$ID.group <- as.character(carto$ID.group) + return(carto) } diff --git a/README.md b/README.md index e5545ae..556cd1e 100644 --- a/README.md +++ b/README.md @@ -41,9 +41,9 @@ Below, there is a list with a brief overview of all package functions: # Installation -[Installing Rtools43 for Windows](https://cran.r-project.org/bin/windows/Rtools/rtools43/rtools.html) +[Installing Rtools44 for Windows](https://cran.r-project.org/bin/windows/Rtools/rtools44/rtools.html) -R version 4.3.0 and newer for Windows requires the new Rtools43 to build R packages with C/C++/Fortran code from source. +R version 4.4.0 and newer for Windows requires the new Rtools44 to build R packages with C/C++/Fortran code from source. ## Install from CRAN @@ -78,6 +78,7 @@ See the following vignettes for further details and examples using this package: * [bigDM: fitting spatio-temporal models](https://emi-sstcdapp.unavarra.es/bigDM/bigDM-3-fitting-spatio-temporal-models.html) * [bigDM: fitting multivariate spatial models](https://emi-sstcdapp.unavarra.es/bigDM/bigDM-4-fitting-multivariate-spatial-models.html) + When using this package, please cite the following papers: [Orozco-Acosta, E., Adin, A., and Ugarte, M.D. (2021). Scalable Bayesian modeling for smoothing disease risks in large spatial data sets using INLA. _Spatial Statistics_, __41__, 100496.](https://doi.org/10.1016/j.spasta.2021.100496) @@ -91,6 +92,10 @@ When using this package, please cite the following papers: ``` news(package="bigDM") ``` +__Changes in version 0.5.4__ (2024 May 30) +* small bugs fixed and performance improvements +* package built for R-4.4 + __Changes in version 0.5.3__ (2023 Oct 17) * bugs fixed * faster implementation of `divide_carto()` function diff --git a/inst/CITATION b/inst/CITATION index 23be920..f72b12c 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,11 +5,11 @@ bibentry( author = c(person("A","Adin"), person("E","Orozco-Acosta"), person("M D","Ugarte")), - year = "2023", - note = "R package version 0.5.3", + year = "2024", + note = "R package version 0.5.4", url = "https://github.com/spatialstatisticsupna/bigDM", - textVersion = paste("Adin, A., Orozco-Acosta, E., and Ugarte, M.D. (2023).", - "bigDM: Scalable Bayesian Disease Mapping Models for High-Dimensional Data. R package version 0.5.3", "https://github.com/spatialstatisticsupna/bigDM.") + textVersion = paste("Adin, A., Orozco-Acosta, E., and Ugarte, M.D. (2024).", + "bigDM: Scalable Bayesian Disease Mapping Models for High-Dimensional Data. R package version 0.5.4", "https://github.com/spatialstatisticsupna/bigDM.") ) bibentry( diff --git a/man/Mmodel_compute_cor.Rd b/man/Mmodel_compute_cor.Rd index 8abd0bd..6571c8b 100644 --- a/man/Mmodel_compute_cor.Rd +++ b/man/Mmodel_compute_cor.Rd @@ -19,5 +19,5 @@ The input \code{inla} object with two additional elements: \item{\code{marginals.var}}{A list containing the posterior marginal densities of the variances for each disease.} } \description{ -This function takes a \code{inla} object fitted using the \code{\link{MCAR_INLA}} function and computes the correlation coefficients between diseases. See Details for more information. +This function takes a \code{inla} object fitted using the \code{\link{MCAR_INLA}} function and computes the correlation coefficients between diseases. } diff --git a/man/bigDM-package.Rd b/man/bigDM-package.Rd index 690f75f..c2eef18 100644 --- a/man/bigDM-package.Rd +++ b/man/bigDM-package.Rd @@ -30,7 +30,7 @@ Below, there is a list with a brief overview of all package functions: } } \examples{ -## See the examples for CAR_INLA, MCAR_INLA and STCAR_INLA functions +## See the examples for CAR_INLA, MCAR_INLA and STCAR_INLA functions ## } \references{ diff --git a/man/mergeINLA.Rd b/man/mergeINLA.Rd index d82e395..d739250 100644 --- a/man/mergeINLA.Rd +++ b/man/mergeINLA.Rd @@ -43,13 +43,13 @@ mergeINLA( } \value{ This function returns an object of class \code{inla} containing the following elements: -\item{\code{summary.fixed}}{A data.frame containing the mean, standard deviation, quantiles and mode of the model's fixed effects.} -\item{\code{marginals.fixed}}{A list containing the posterior marginal density of the model's fixed effects.} -\item{\code{summary.fixed.partition}}{A data.frame containing the mean, standard deviation, quantiles and mode of the model's fixed effects in each partition.} +\item{\code{summary.fixed}}{A data.frame containing the mean, standard deviation and quantiles of the model's fixed effects. This feature is EXPERIMENTAL for the moment.} +\item{\code{marginals.fixed}}{A list containing the posterior marginal density of the model's fixed effects. This feature is EXPERIMENTAL for the moment.} +\item{\code{summary.fixed.partition}}{A data.frame containing the mean, standard deviation and quantiles of the model's fixed effects in each partition.} \item{\code{marginals.fixed.partition}}{A list containing the posterior marginal density of the model's fixed effects in each partition.} -\item{\code{summary.random}}{If \code{k=0} a list with a data.frame containing the mean, standard deviation, quantiles and mode of the model's random effects.} +\item{\code{summary.random}}{If \code{k=0} a list with a data.frame containing the mean, standard deviation and quantiles of the model's random effects.} \item{\code{marginals.random}}{If \code{k=0} a list containing the posterior marginal densities of the model's random effects.} -\item{\code{summary.linear.predictor}}{If \code{k=0} a data.frame containing the mean, standard deviation, quantiles and mode of the log-risks (or log-rates) in the model.} +\item{\code{summary.linear.predictor}}{If \code{k=0} a data.frame containing the mean, standard deviation and quantiles of the log-risks (or log-rates) in the model.} \item{\code{marginals.linear.predictor}}{If \code{k=0} a list containing the posterior marginal densities of the log-risks (or log-rates) in the model.} \item{\code{summary.fitted.values}}{A data.frame containing the mean, standard deviation, quantiles, mode and cdf of the risks (or rates) in the model. Available only if \code{compute.fitted.values=TRUE}.} \item{\code{marginals.fitted.values}}{A list containing the posterior marginal densities of the risks (or rates) in the model. Available only if \code{compute.fitted.values=TRUE}.} @@ -80,6 +80,6 @@ If the \code{merge.strategy="original"} argument is specified (default option), See \insertCite{orozco2020;textual}{bigDM} and \insertCite{orozco2022;textual}{bigDM} for more details. } \examples{ -## See the vignettes accompanying this package for an example of its use. +## See the vignettes accompanying this package ## }