Skip to content

Commit

Permalink
Merge pull request #292 from xoopR/transformers
Browse files Browse the repository at this point in the history
Transformers
  • Loading branch information
RaphaelS1 authored Oct 13, 2023
2 parents abdb22c + 548ba95 commit 87d752a
Show file tree
Hide file tree
Showing 14 changed files with 307 additions and 59 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/rcmdcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ jobs:
pak::pkg_system_requirements("rcmdcheck", execute = TRUE)
shell: Rscript {0}

- name: Install valgrind
if: ${{ runner.os == 'Linux' && matrix.config.r == 'release'}}
run: |
sudo apt-get install --yes valgrind
- name: Install dependencies
run: |
pak::local_install_dev_deps(upgrade = TRUE)
Expand All @@ -78,6 +83,11 @@ jobs:
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}

- name: Valgrind
if: ${{ runner.os == 'Linux' && matrix.config.r == 'release' && always()}}
run: rcmdcheck::rcmdcheck(build_args = "--no-build-vignettes", args = c("--use-valgrind", "--no-codoc", "--no-manual", "--ignore-vignettes"), error_on = "error", check_dir = "check")
shell: Rscript {0}

- name: Show testthat output
if: always()
run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
Expand Down
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: distr6
Title: The Complete R6 Probability Distributions Interface
Version: 1.8.0
Version: 1.8.1
Authors@R:
c(person(given = "Raphael",
family = "Sonabend",
Expand Down Expand Up @@ -220,6 +220,7 @@ Collate:
'listWrappers.R'
'makeUniqueDistributions.R'
'measures.R'
'merge_cols.R'
'mixMatrix.R'
'mixturiseVector.R'
'plot_continuous.R'
Expand All @@ -234,4 +235,5 @@ Collate:
'simulateEmpiricalDistribution.R'
'skewType.R'
'sugar.R'
'transformers.R'
'zzz.R'
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# distr6 1.8.1

* Add 'transformer' functions `pdfcdf` and `cdfpdf`, which use Rcpp to transform matrics/arrays/vectors between pdf->cdf and cdf->pdf respectively.

# distr6 1.8.0

* Add `Arrdist`, which generalises the `Matdist` to a three-dimensional array, useful for Bayesian predictions where the third dimension is multiple distributions.
Expand Down
16 changes: 16 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,22 @@ C_UniformKernelQuantile <- function(x, lower, logp) {
.Call(`_distr6_C_UniformKernelQuantile`, x, lower, logp)
}

C_vec_PdfCdf <- function(x) {
.Call(`_distr6_C_vec_PdfCdf`, x)
}

C_vec_CdfPdf <- function(x) {
.Call(`_distr6_C_vec_CdfPdf`, x)
}

C_mat_PdfCdf <- function(x) {
.Call(`_distr6_C_mat_PdfCdf`, x)
}

C_mat_CdfPdf <- function(x) {
.Call(`_distr6_C_mat_CdfPdf`, x)
}

C_dpq <- function(fun, x, args, lower = TRUE, log = FALSE) {
.Call(`_distr6_C_dpq`, fun, x, args, lower, log)
}
Expand Down
25 changes: 2 additions & 23 deletions R/SDistribution_Arrdist.R
Original file line number Diff line number Diff line change
Expand Up @@ -396,33 +396,12 @@ c.Arrdist <- function(...) {
stop("Can't combine array distributions with different lengths on third dimension.")
}

pdfs = .merge_arrpdf_cols(pdfs)
pdfs = do.call(abind::abind, list(what = pdfs, along = 1))
pdfs <- .merge_arrpdf_cols(pdfs)
pdfs <- do.call(abind::abind, list(what = pdfs, along = 1))

as.Distribution(pdfs, fun = "pdf", decorators = decs)
}

.merge_arrpdf_cols <- function(pdfs) {
nc <- unique(viapply(pdfs, ncol))

if (length(nc) == 1) {
if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) {
return(pdfs)
}
}

cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames)))))
# new number of rows and columns
nc <- length(cnms)
nl <- dim(pdfs[[1]])[3L]

lapply(pdfs, function(.x) {
out <- array(0, c(nrow(.x), nc, nl), list(NULL, cnms, NULL))
out[, match(as.numeric(colnames(.x)), cnms), ] <- .x
out
})
}

#' @title Extract one or more Distributions from an Array distribution
#' @description Extract a [WeightedDiscrete] or [Matdist] or [Arrdist] from a [Arrdist].
#' @param ad [Arrdist] from which to extract Distributions.
Expand Down
22 changes: 0 additions & 22 deletions R/SDistribution_Matdist.R
Original file line number Diff line number Diff line change
Expand Up @@ -358,28 +358,6 @@ c.Matdist <- function(...) {
decorators = decs)
}

.merge_matpdf_cols <- function(pdfs) {

nc <- unique(viapply(pdfs, ncol))

if (length(nc) == 1) {
if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) {
return(pdfs)
}
}

cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames)))))
# new number of rows and columns
nc <- length(cnms)

lapply(pdfs, function(.x) {
out <- matrix(0, nrow(.x), nc, FALSE, list(NULL, cnms))
out[, match(as.numeric(colnames(.x)), cnms)] <- .x
out
})
}


#' @title Extract one or more Distributions from a Matdist
#' @description Extract a [WeightedDiscrete] or [Matdist] from a [Matdist].
#' @param md [Matdist] from which to extract Distributions.
Expand Down
18 changes: 6 additions & 12 deletions R/getParameterSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -685,12 +685,12 @@ getParameterSet.WeightedDiscrete <- function(object, ...) { # nolint

if (length(pdfs)) {
cdfs <- setNames(
lapply(pdfs, cumsum),
lapply(pdfs, pdfcdf),
gsub("pdf", "cdf", names(pdfs))
)
} else {
pdfs <- setNames(
lapply(cdfs, function(.x) c(.x[1], diff(.x))),
lapply(cdfs, cdfpdf),
gsub("cdf", "pdf", names(cdfs))
)
}
Expand All @@ -716,9 +716,9 @@ getParameterSet.Matdist <- function(object, ...) { # nolint
cdf <- list_element(x, "cdf")$cdf

if (length(pdf)) {
cdf <- t(apply(pdf, 1, cumsum))
cdf <- pdfcdf(pdf)
} else {
pdf <- t(apply(cdf, 1, function(.y) c(.y[1], diff(.y))))
pdf <- cdfpdf(cdf)
}

assert_cdf_matrix(cdf, pdf)
Expand All @@ -742,15 +742,9 @@ getParameterSet.Arrdist <- function(object, which.curve = 0.5, ...) { # nolint
cdf <- list_element(x, "cdf")$cdf

if (length(pdf)) {
cdf <- pdf
for (i in 2:ncol(cdf)) {
cdf[, i, ] <- cdf[, i, ] + cdf[, i - 1, ]
}
cdf <- pdfcdf(pdf)
} else {
pdf <- cdf
for (i in ncol(pdf):2) {
pdf[, i, ] <- pdf[, i, ] - pdf[, i - 1, ]
}
pdf <- cdfpdf(cdf)
}

assert_cdf_array(cdf, pdf)
Expand Down
74 changes: 74 additions & 0 deletions R/merge_cols.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# unify column names across pdfs in
# matrices (merge_matpdf_cols) or arrays (merge_arrpdf_cols)
.merge_cols <- function(arrs, fun = "pdf") {
if (fun == "cdf") {
arrs <- lapply(arrs, cdfpdf)
} else if (fun == "surv") {
arrs <- lapply(arrs, function(.x) cdfpdf(1 - .x))
} else if (fun != "pdf") {
stop(sprintf(
"Expected 'fun' to be 'pdf', 'cdf', or 'surv'. Got: '%s'.",
fun
))
}

if (dim(arrs[[1L]]) == 2L) {
out <- .merge_matpdf_cols(arrs)
} else {
out <- .merge_arrpdf_cols(arrs)
}

if (fun == "cdf") {
lapply(out, pdfcdf)
} else if (fun == "surv") {
lapply(out, function(.x) 1 - pdfcdf(.x))
} else {
out
}
}

.merge_arrpdf_cols <- function(pdfs) {
if (length(unique(viapply(pdfs, function(.x) dim(.x)[[3L]]))) > 1) {
stop("Can only merge arrays with same length on third dimension.")
}

nc <- unique(viapply(pdfs, ncol))

if (length(nc) == 1) {
if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) {
return(pdfs)
}
}

cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames)))))
# new number of rows and columns
nc <- length(cnms)
nl <- dim(pdfs[[1]])[3L]

lapply(pdfs, function(.x) {
out <- array(0, c(nrow(.x), nc, nl), list(NULL, cnms, NULL))
out[, match(as.numeric(colnames(.x)), cnms), ] <- .x
out
})
}

.merge_matpdf_cols <- function(pdfs) {

nc <- unique(viapply(pdfs, ncol))

if (length(nc) == 1) {
if (all(vapply(pdfs, colnames, character(nc)) == colnames(pdfs[[1]]))) {
return(pdfs)
}
}

cnms <- sort(unique(as.numeric(unlist(lapply(pdfs, colnames)))))
# new number of rows and columns
nc <- length(cnms)

lapply(pdfs, function(.x) {
out <- matrix(0, nrow(.x), nc, FALSE, list(NULL, cnms))
out[, match(as.numeric(colnames(.x)), cnms)] <- .x
out
})
}
37 changes: 37 additions & 0 deletions R/transformers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
pdfcdf <- function(pdf) {
d <- dim(pdf)
dn <- dimnames(pdf)

if (is.null(d)) {
return(cumsum(pdf))
} else if (length(d) == 2) {
out <- C_mat_PdfCdf(pdf)
} else if (length(d) == 3) {
# quicker than apply with C_mat_
out <- aperm(apply(unname(pdf), c(1, 3), C_vec_PdfCdf), c(2, 1, 3))
} else {
stop(sprintf("Expected maximum of three dimensions but got '%s'.", length(d)))
}

dimnames(out) <- dn
out
}

cdfpdf <- function(cdf) {
d <- dim(cdf)
dn <- dimnames(cdf)

if (is.null(d)) {
return(c(cdf[1], diff(cdf)))
} else if (length(d) == 2) {
out <- C_mat_CdfPdf(cdf)
} else if (length(d) == 3) {
# quicker than apply with C_mat_
out <- aperm(apply(unname(cdf), c(1, 3), C_vec_CdfPdf), c(2, 1, 3))
} else {
stop(sprintf("Expected maximum of three dimensions but got '%s'.", length(d)))
}

dimnames(out) <- dn
out
}
48 changes: 48 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,50 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// C_vec_PdfCdf
NumericVector C_vec_PdfCdf(NumericVector x);
RcppExport SEXP _distr6_C_vec_PdfCdf(SEXP xSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP);
rcpp_result_gen = Rcpp::wrap(C_vec_PdfCdf(x));
return rcpp_result_gen;
END_RCPP
}
// C_vec_CdfPdf
NumericVector C_vec_CdfPdf(NumericVector x);
RcppExport SEXP _distr6_C_vec_CdfPdf(SEXP xSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP);
rcpp_result_gen = Rcpp::wrap(C_vec_CdfPdf(x));
return rcpp_result_gen;
END_RCPP
}
// C_mat_PdfCdf
NumericMatrix C_mat_PdfCdf(NumericMatrix x);
RcppExport SEXP _distr6_C_mat_PdfCdf(SEXP xSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP);
rcpp_result_gen = Rcpp::wrap(C_mat_PdfCdf(x));
return rcpp_result_gen;
END_RCPP
}
// C_mat_CdfPdf
NumericMatrix C_mat_CdfPdf(NumericMatrix x);
RcppExport SEXP _distr6_C_mat_CdfPdf(SEXP xSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP);
rcpp_result_gen = Rcpp::wrap(C_mat_CdfPdf(x));
return rcpp_result_gen;
END_RCPP
}
// C_dpq
NumericMatrix C_dpq(std::string fun, NumericVector x, std::list<NumericVector> args, int lower, int log);
RcppExport SEXP _distr6_C_dpq(SEXP funSEXP, SEXP xSEXP, SEXP argsSEXP, SEXP lowerSEXP, SEXP logSEXP) {
Expand Down Expand Up @@ -655,6 +699,10 @@ static const R_CallMethodDef CallEntries[] = {
{"_distr6_C_UniformKernelPdf", (DL_FUNC) &_distr6_C_UniformKernelPdf, 2},
{"_distr6_C_UniformKernelCdf", (DL_FUNC) &_distr6_C_UniformKernelCdf, 3},
{"_distr6_C_UniformKernelQuantile", (DL_FUNC) &_distr6_C_UniformKernelQuantile, 3},
{"_distr6_C_vec_PdfCdf", (DL_FUNC) &_distr6_C_vec_PdfCdf, 1},
{"_distr6_C_vec_CdfPdf", (DL_FUNC) &_distr6_C_vec_CdfPdf, 1},
{"_distr6_C_mat_PdfCdf", (DL_FUNC) &_distr6_C_mat_PdfCdf, 1},
{"_distr6_C_mat_CdfPdf", (DL_FUNC) &_distr6_C_mat_CdfPdf, 1},
{"_distr6_C_dpq", (DL_FUNC) &_distr6_C_dpq, 5},
{"_distr6_C_r", (DL_FUNC) &_distr6_C_r, 3},
{NULL, NULL, 0}
Expand Down
Loading

0 comments on commit 87d752a

Please sign in to comment.