diff --git a/.github/workflows/rcmdcheck.yml b/.github/workflows/rcmdcheck.yml index 164639528..37bb858a9 100644 --- a/.github/workflows/rcmdcheck.yml +++ b/.github/workflows/rcmdcheck.yml @@ -57,6 +57,11 @@ jobs: pak::pkg_system_requirements("rcmdcheck", execute = TRUE) shell: Rscript {0} + - name: Install valgrind + if: ${{ runner.os == 'Linux' && matrix.config.r == 'release'}} + run: | + sudo apt-get install --yes valgrind + - name: Install dependencies run: | pak::local_install_dev_deps(upgrade = TRUE) @@ -78,6 +83,11 @@ jobs: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") shell: Rscript {0} + - name: Valgrind + if: ${{ runner.os == 'Linux' && matrix.config.r == 'release' && always()}} + run: rcmdcheck::rcmdcheck(build_args = "--no-build-vignettes", args = c("--use-valgrind", "--no-codoc", "--no-manual", "--ignore-vignettes"), error_on = "error", check_dir = "check") + shell: Rscript {0} + - name: Show testthat output if: always() run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true diff --git a/DESCRIPTION b/DESCRIPTION index 8407f3758..28b1e824d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: distr6 Title: The Complete R6 Probability Distributions Interface -Version: 1.8.0 +Version: 1.8.1 Authors@R: c(person(given = "Raphael", family = "Sonabend", @@ -220,6 +220,7 @@ Collate: 'listWrappers.R' 'makeUniqueDistributions.R' 'measures.R' + 'merge_cols.R' 'mixMatrix.R' 'mixturiseVector.R' 'plot_continuous.R' @@ -234,4 +235,5 @@ Collate: 'simulateEmpiricalDistribution.R' 'skewType.R' 'sugar.R' + 'transformers.R' 'zzz.R' diff --git a/NEWS.md b/NEWS.md index 731aebbe8..ffdf2f633 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# distr6 1.8.1 + +* Add 'transformer' functions `pdfcdf` and `cdfpdf`, which use Rcpp to transform matrics/arrays/vectors between pdf->cdf and cdf->pdf respectively. + # distr6 1.8.0 * Add `Arrdist`, which generalises the `Matdist` to a three-dimensional array, useful for Bayesian predictions where the third dimension is multiple distributions. diff --git a/R/RcppExports.R b/R/RcppExports.R index 81026040f..e18051ed4 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -173,6 +173,22 @@ C_UniformKernelQuantile <- function(x, lower, logp) { .Call(`_distr6_C_UniformKernelQuantile`, x, lower, logp) } +C_vec_PdfCdf <- function(x) { + .Call(`_distr6_C_vec_PdfCdf`, x) +} + +C_vec_CdfPdf <- function(x) { + .Call(`_distr6_C_vec_CdfPdf`, x) +} + +C_mat_PdfCdf <- function(x) { + .Call(`_distr6_C_mat_PdfCdf`, x) +} + +C_mat_CdfPdf <- function(x) { + .Call(`_distr6_C_mat_CdfPdf`, x) +} + C_dpq <- function(fun, x, args, lower = TRUE, log = FALSE) { .Call(`_distr6_C_dpq`, fun, x, args, lower, log) } diff --git a/R/SDistribution_Arrdist.R b/R/SDistribution_Arrdist.R index bd70c1879..e6d8efd53 100644 --- a/R/SDistribution_Arrdist.R +++ b/R/SDistribution_Arrdist.R @@ -396,33 +396,12 @@ c.Arrdist <- function(...) { stop("Can't combine array distributions with different lengths on third dimension.") } - pdfs = .merge_arrpdf_cols(pdfs) - pdfs = do.call(abind::abind, list(what = pdfs, along = 1)) + pdfs <- .merge_arrpdf_cols(pdfs) + pdfs <- do.call(abind::abind, list(what = pdfs, along = 1)) as.Distribution(pdfs, fun = "pdf", decorators = decs) } -.merge_arrpdf_cols <- function(pdfs) { - nc <- unique(viapply(pdfs, ncol)) - - if (length(nc) == 1) { - if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) { - return(pdfs) - } - } - - cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames))))) - # new number of rows and columns - nc <- length(cnms) - nl <- dim(pdfs[[1]])[3L] - - lapply(pdfs, function(.x) { - out <- array(0, c(nrow(.x), nc, nl), list(NULL, cnms, NULL)) - out[, match(as.numeric(colnames(.x)), cnms), ] <- .x - out - }) -} - #' @title Extract one or more Distributions from an Array distribution #' @description Extract a [WeightedDiscrete] or [Matdist] or [Arrdist] from a [Arrdist]. #' @param ad [Arrdist] from which to extract Distributions. diff --git a/R/SDistribution_Matdist.R b/R/SDistribution_Matdist.R index 55e84d665..af2003c85 100644 --- a/R/SDistribution_Matdist.R +++ b/R/SDistribution_Matdist.R @@ -358,28 +358,6 @@ c.Matdist <- function(...) { decorators = decs) } -.merge_matpdf_cols <- function(pdfs) { - - nc <- unique(viapply(pdfs, ncol)) - - if (length(nc) == 1) { - if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) { - return(pdfs) - } - } - - cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames))))) - # new number of rows and columns - nc <- length(cnms) - - lapply(pdfs, function(.x) { - out <- matrix(0, nrow(.x), nc, FALSE, list(NULL, cnms)) - out[, match(as.numeric(colnames(.x)), cnms)] <- .x - out - }) -} - - #' @title Extract one or more Distributions from a Matdist #' @description Extract a [WeightedDiscrete] or [Matdist] from a [Matdist]. #' @param md [Matdist] from which to extract Distributions. diff --git a/R/getParameterSet.R b/R/getParameterSet.R index 673bb1313..1c17cb0fb 100644 --- a/R/getParameterSet.R +++ b/R/getParameterSet.R @@ -685,12 +685,12 @@ getParameterSet.WeightedDiscrete <- function(object, ...) { # nolint if (length(pdfs)) { cdfs <- setNames( - lapply(pdfs, cumsum), + lapply(pdfs, pdfcdf), gsub("pdf", "cdf", names(pdfs)) ) } else { pdfs <- setNames( - lapply(cdfs, function(.x) c(.x[1], diff(.x))), + lapply(cdfs, cdfpdf), gsub("cdf", "pdf", names(cdfs)) ) } @@ -716,9 +716,9 @@ getParameterSet.Matdist <- function(object, ...) { # nolint cdf <- list_element(x, "cdf")$cdf if (length(pdf)) { - cdf <- t(apply(pdf, 1, cumsum)) + cdf <- pdfcdf(pdf) } else { - pdf <- t(apply(cdf, 1, function(.y) c(.y[1], diff(.y)))) + pdf <- cdfpdf(cdf) } assert_cdf_matrix(cdf, pdf) @@ -742,15 +742,9 @@ getParameterSet.Arrdist <- function(object, which.curve = 0.5, ...) { # nolint cdf <- list_element(x, "cdf")$cdf if (length(pdf)) { - cdf <- pdf - for (i in 2:ncol(cdf)) { - cdf[, i, ] <- cdf[, i, ] + cdf[, i - 1, ] - } + cdf <- pdfcdf(pdf) } else { - pdf <- cdf - for (i in ncol(pdf):2) { - pdf[, i, ] <- pdf[, i, ] - pdf[, i - 1, ] - } + pdf <- cdfpdf(cdf) } assert_cdf_array(cdf, pdf) diff --git a/R/merge_cols.R b/R/merge_cols.R new file mode 100644 index 000000000..81d93cf19 --- /dev/null +++ b/R/merge_cols.R @@ -0,0 +1,74 @@ +# unify column names across pdfs in +# matrices (merge_matpdf_cols) or arrays (merge_arrpdf_cols) +.merge_cols <- function(arrs, fun = "pdf") { + if (fun == "cdf") { + arrs <- lapply(arrs, cdfpdf) + } else if (fun == "surv") { + arrs <- lapply(arrs, function(.x) cdfpdf(1 - .x)) + } else if (fun != "pdf") { + stop(sprintf( + "Expected 'fun' to be 'pdf', 'cdf', or 'surv'. Got: '%s'.", + fun + )) + } + + if (dim(arrs[[1L]]) == 2L) { + out <- .merge_matpdf_cols(arrs) + } else { + out <- .merge_arrpdf_cols(arrs) + } + + if (fun == "cdf") { + lapply(out, pdfcdf) + } else if (fun == "surv") { + lapply(out, function(.x) 1 - pdfcdf(.x)) + } else { + out + } +} + +.merge_arrpdf_cols <- function(pdfs) { + if (length(unique(viapply(pdfs, function(.x) dim(.x)[[3L]]))) > 1) { + stop("Can only merge arrays with same length on third dimension.") + } + + nc <- unique(viapply(pdfs, ncol)) + + if (length(nc) == 1) { + if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) { + return(pdfs) + } + } + + cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames))))) + # new number of rows and columns + nc <- length(cnms) + nl <- dim(pdfs[[1]])[3L] + + lapply(pdfs, function(.x) { + out <- array(0, c(nrow(.x), nc, nl), list(NULL, cnms, NULL)) + out[, match(as.numeric(colnames(.x)), cnms), ] <- .x + out + }) +} + +.merge_matpdf_cols <- function(pdfs) { + + nc <- unique(viapply(pdfs, ncol)) + + if (length(nc) == 1) { + if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) { + return(pdfs) + } + } + + cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames))))) + # new number of rows and columns + nc <- length(cnms) + + lapply(pdfs, function(.x) { + out <- matrix(0, nrow(.x), nc, FALSE, list(NULL, cnms)) + out[, match(as.numeric(colnames(.x)), cnms)] <- .x + out + }) +} \ No newline at end of file diff --git a/R/transformers.R b/R/transformers.R new file mode 100644 index 000000000..cf8e7b6c1 --- /dev/null +++ b/R/transformers.R @@ -0,0 +1,37 @@ +pdfcdf <- function(pdf) { + d <- dim(pdf) + dn <- dimnames(pdf) + + if (is.null(d)) { + return(cumsum(pdf)) + } else if (length(d) == 2) { + out <- C_mat_PdfCdf(pdf) + } else if (length(d) == 3) { + # quicker than apply with C_mat_ + out <- aperm(apply(unname(pdf), c(1, 3), C_vec_PdfCdf), c(2, 1, 3)) + } else { + stop(sprintf("Expected maximum of three dimensions but got '%s'.", length(d))) + } + + dimnames(out) <- dn + out +} + +cdfpdf <- function(cdf) { + d <- dim(cdf) + dn <- dimnames(cdf) + + if (is.null(d)) { + return(c(cdf[1], diff(cdf))) + } else if (length(d) == 2) { + out <- C_mat_CdfPdf(cdf) + } else if (length(d) == 3) { + # quicker than apply with C_mat_ + out <- aperm(apply(unname(cdf), c(1, 3), C_vec_CdfPdf), c(2, 1, 3)) + } else { + stop(sprintf("Expected maximum of three dimensions but got '%s'.", length(d))) + } + + dimnames(out) <- dn + out +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 652f6c189..380ea5b02 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -582,6 +582,50 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// C_vec_PdfCdf +NumericVector C_vec_PdfCdf(NumericVector x); +RcppExport SEXP _distr6_C_vec_PdfCdf(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(C_vec_PdfCdf(x)); + return rcpp_result_gen; +END_RCPP +} +// C_vec_CdfPdf +NumericVector C_vec_CdfPdf(NumericVector x); +RcppExport SEXP _distr6_C_vec_CdfPdf(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(C_vec_CdfPdf(x)); + return rcpp_result_gen; +END_RCPP +} +// C_mat_PdfCdf +NumericMatrix C_mat_PdfCdf(NumericMatrix x); +RcppExport SEXP _distr6_C_mat_PdfCdf(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(C_mat_PdfCdf(x)); + return rcpp_result_gen; +END_RCPP +} +// C_mat_CdfPdf +NumericMatrix C_mat_CdfPdf(NumericMatrix x); +RcppExport SEXP _distr6_C_mat_CdfPdf(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(C_mat_CdfPdf(x)); + return rcpp_result_gen; +END_RCPP +} // C_dpq NumericMatrix C_dpq(std::string fun, NumericVector x, std::list args, int lower, int log); RcppExport SEXP _distr6_C_dpq(SEXP funSEXP, SEXP xSEXP, SEXP argsSEXP, SEXP lowerSEXP, SEXP logSEXP) { @@ -655,6 +699,10 @@ static const R_CallMethodDef CallEntries[] = { {"_distr6_C_UniformKernelPdf", (DL_FUNC) &_distr6_C_UniformKernelPdf, 2}, {"_distr6_C_UniformKernelCdf", (DL_FUNC) &_distr6_C_UniformKernelCdf, 3}, {"_distr6_C_UniformKernelQuantile", (DL_FUNC) &_distr6_C_UniformKernelQuantile, 3}, + {"_distr6_C_vec_PdfCdf", (DL_FUNC) &_distr6_C_vec_PdfCdf, 1}, + {"_distr6_C_vec_CdfPdf", (DL_FUNC) &_distr6_C_vec_CdfPdf, 1}, + {"_distr6_C_mat_PdfCdf", (DL_FUNC) &_distr6_C_mat_PdfCdf, 1}, + {"_distr6_C_mat_CdfPdf", (DL_FUNC) &_distr6_C_mat_CdfPdf, 1}, {"_distr6_C_dpq", (DL_FUNC) &_distr6_C_dpq, 5}, {"_distr6_C_r", (DL_FUNC) &_distr6_C_r, 3}, {NULL, NULL, 0} diff --git a/src/Transformers.cpp b/src/Transformers.cpp new file mode 100644 index 000000000..9b8bd906f --- /dev/null +++ b/src/Transformers.cpp @@ -0,0 +1,70 @@ +#include +using namespace Rcpp; + +// [[Rcpp::export]] +NumericVector C_vec_PdfCdf(NumericVector x) { + int len = x.size(); + + NumericVector out(len); + out[0] = x[0]; + + for (int i = 1; i < len; i++) { + out[i] = x[i] + out[i - 1]; + } + + return out; +} + +// [[Rcpp::export]] +NumericVector C_vec_CdfPdf(NumericVector x) { + int len = x.size(); + + NumericVector out(len); + out[0] = x[0]; + + for (int i = (len - 1); i > 0; i--) { + out[i] = x[i] - x[i - 1]; + } + + return out; +} + +// [[Rcpp::export]] +NumericMatrix C_mat_PdfCdf(NumericMatrix x) { + + int nc = x.ncol(); + int nr = x.nrow(); + + NumericMatrix out(nr, nc); + + for (int i = 0; i < nr; i++) { + for (int j = 0; j < nc; j++) { + if (j == 0) { + out(i, j) = x(i, j); + } else { + out(i, j) = x(i, j) + out(i, j - 1); + } + } + } + return out; +} + +// [[Rcpp::export]] +NumericMatrix C_mat_CdfPdf(NumericMatrix x) { + + int nr = x.nrow(); + int nc = x.ncol(); + + NumericMatrix out(nr, nc); + + for (int i = 0; i < nr; i++) { + for (int j = (nc - 1); j >= 0; j--) { + if (j == 0) { + out(i, j) = x(i, j); + } else { + out(i, j) = x(i, j) - x(i, j - 1); + } + } + } + return out; +} diff --git a/tests/testthat/test-dparse.R b/tests/testthat/test-dparse.R index 908d2c30e..5196d0fe4 100644 --- a/tests/testthat/test-dparse.R +++ b/tests/testthat/test-dparse.R @@ -38,7 +38,7 @@ test_that("ShortName, ClassName and Alias are unique between distributions", { expect_equal(length(d6), length(unique(d6))) }) -test_that("Every distribution is created with it's propper R6 class", { +test_that("Every distribution is created with it's proper R6 class", { # Get calls d6 <- listDistributions()[,(ids = paste(tolower(ShortName), tolower(ClassName), tolower(Alias), sep = ", "))] calls <- strsplit(d6, ",") diff --git a/tests/testthat/test-sdistribution-Arrdist.R b/tests/testthat/test-sdistribution-Arrdist.R index d1bf4e9ba..096dff9a7 100644 --- a/tests/testthat/test-sdistribution-Arrdist.R +++ b/tests/testthat/test-sdistribution-Arrdist.R @@ -67,6 +67,12 @@ test_that("c.Arrdist", { expect_equal(dim(r), c(50, 60)) expect_true(all(r <= 20)) expect_true(all(r >= 1)) + + arr4pdf = array(runif(200), c(20, 10, 1)) + arr4pdf = arr4pdf / sum(arr4pdf) + colnames(arr4pdf) = 1:10 + arr4 = as.Distribution(arr4pdf, fun = "pdf") + expect_error(c(arr1, arr4), "Can't combine") }) test_that("Arrdist basics", { diff --git a/tests/testthat/test-transformers.R b/tests/testthat/test-transformers.R new file mode 100644 index 000000000..2e98d8efa --- /dev/null +++ b/tests/testthat/test-transformers.R @@ -0,0 +1,30 @@ +test_that("vector transformers", { + pdf <- c(0.1, 0.2, 0.7) + cdf <- cumsum(pdf) + + expect_equal(pdfcdf(pdf), cdf) + expect_equal(cdfpdf(cdf), pdf) +}) + +test_that("matrix transformers", { + pdf <- matrix(c(0.1, 0.2, 0.7, 0.2, 0.2, 0.6), 2, 3, TRUE) + cdf <- t(apply(pdf, 1, cumsum)) + + expect_equal(pdfcdf(pdf), cdf) + expect_equal(cdfpdf(cdf), pdf) +}) + +test_that("array transformers", { + pdf <- matrix(c(0.1, 0.2, 0.7, 0.2, 0.2, 0.6), 2, 3, TRUE) + cdf <- t(apply(pdf, 1, cumsum)) + pdf <- array(pdf, c(2, 3, 5)) + cdf <- array(cdf, c(2, 3, 5)) + + expect_equal(pdfcdf(pdf), cdf) + expect_equal(cdfpdf(cdf), pdf) +}) + +test_that("transformers error when expected", { + expect_error(pdfcdf(array(dim = c(1, 1, 1, 1))), "Expected maximum") + expect_error(cdfpdf(array(dim = c(1, 1, 1, 1))), "Expected maximum") +})