From ca7677e66dc82be51742a3480421836dbc63de3d Mon Sep 17 00:00:00 2001 From: RaphaelS1 Date: Wed, 1 Nov 2023 13:25:37 +0000 Subject: [PATCH 1/4] fix massive bottleneck --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/SDistribution_Arrdist.R | 14 ++++++-------- R/SDistribution_Matdist.R | 17 ++++++----------- R/SDistribution_WeightedDiscrete.R | 9 +++------ src/Distributions.cpp | 26 +++++++++++++------------- src/RcppExports.cpp | 12 ++++++------ 7 files changed, 39 insertions(+), 45 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a24640230..7409c91aa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: distr6 Title: The Complete R6 Probability Distributions Interface -Version: 1.8.3 +Version: 1.8.4 Authors@R: c(person(given = "Raphael", family = "Sonabend", diff --git a/NEWS.md b/NEWS.md index 5bb395921..db1befeb4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# distr6 1.8.4 + +* Fix massive bottleneck in Matdist and Arrdist + # distr6 1.8.3 * Add `decorators` argument to `c.Matdist` and `c.Arrdist` diff --git a/R/SDistribution_Arrdist.R b/R/SDistribution_Arrdist.R index f6f4e85a6..4e2aade39 100644 --- a/R/SDistribution_Arrdist.R +++ b/R/SDistribution_Arrdist.R @@ -310,8 +310,7 @@ Arrdist <- R6Class("Arrdist", .pdf = function(x, log = FALSE) { "pdf, data, wc" %=% gprm(self, c("pdf", "x", "which.curve")) mat <- .extCurve(pdf, wc) - out <- t(C_Vec_WeightedDiscretePdf( - x, matrix(data, ncol(mat), private$.ndists), t(mat))) + out <- t(C_Vec_WeightedDiscretePdf(x, data, t(mat))) if (log) { out <- log(out) } @@ -319,12 +318,11 @@ Arrdist <- R6Class("Arrdist", t(out) }, - .cdf = function(x, lower.tail = TRUE, log.p = FALSE) { # FIXME + .cdf = function(x, lower.tail = TRUE, log.p = FALSE) { "cdf, data, wc" %=% gprm(self, c("cdf", "x", "which.curve")) mat <- .extCurve(cdf, wc) - out <- t(C_Vec_WeightedDiscreteCdf( - x, matrix(data, ncol(mat), nrow(mat)), t(mat), lower.tail, log.p - )) + out <- t(C_Vec_WeightedDiscreteCdf(x, data, t(mat), lower.tail, + log.p)) colnames(out) <- x t(out) }, @@ -332,8 +330,8 @@ Arrdist <- R6Class("Arrdist", .quantile = function(p, lower.tail = TRUE, log.p = FALSE) { "*" %=% gprm(self, c("cdf", "x", "which.curve")) mat <- .extCurve(cdf, which.curve) - out <- t(C_Vec_WeightedDiscreteQuantile(p, - matrix(x, ncol(mat), nrow(mat)), t(mat), lower.tail, log.p)) + out <- t(C_Vec_WeightedDiscreteQuantile(p, x, t(mat), lower.tail, + log.p)) colnames(out) <- NULL t(out) }, diff --git a/R/SDistribution_Matdist.R b/R/SDistribution_Matdist.R index fc127b99c..4b5f01aea 100644 --- a/R/SDistribution_Matdist.R +++ b/R/SDistribution_Matdist.R @@ -274,10 +274,7 @@ Matdist <- R6Class("Matdist", # dpqr .pdf = function(x, log = FALSE) { "pdf, data" %=% gprm(self, c("pdf", "x")) - out <- t(C_Vec_WeightedDiscretePdf( - x, matrix(data, ncol(pdf), private$.ndists), - t(pdf) - )) + out <- t(C_Vec_WeightedDiscretePdf(x, data, t(pdf))) if (log) { out <- log(out) } @@ -285,20 +282,18 @@ Matdist <- R6Class("Matdist", t(out) }, - .cdf = function(x, lower.tail = TRUE, log.p = FALSE) { # FIXME + .cdf = function(x, lower.tail = TRUE, log.p = FALSE) { "cdf, data" %=% gprm(self, c("cdf", "x")) - out <- t(C_Vec_WeightedDiscreteCdf( - x, matrix(data, ncol(cdf), nrow(cdf)), - t(cdf), lower.tail, log.p - )) + out <- t(C_Vec_WeightedDiscreteCdf(x, data, t(cdf), lower.tail, + log.p)) colnames(out) <- x t(out) }, .quantile = function(p, lower.tail = TRUE, log.p = FALSE) { "*" %=% gprm(self, c("cdf", "x")) - out <- t(C_Vec_WeightedDiscreteQuantile(p, matrix(x, ncol(cdf), nrow(cdf)), - t(cdf), lower.tail, log.p)) + out <- t(C_Vec_WeightedDiscreteQuantile(p, x, t(cdf), lower.tail, + log.p)) colnames(out) <- NULL t(out) }, diff --git a/R/SDistribution_WeightedDiscrete.R b/R/SDistribution_WeightedDiscrete.R index 6105680b4..845875f0b 100644 --- a/R/SDistribution_WeightedDiscrete.R +++ b/R/SDistribution_WeightedDiscrete.R @@ -375,8 +375,7 @@ WeightedDiscrete <- R6Class("WeightedDiscrete", data[[i]] <- data[[i]][seq.int(lng)] } pdf <- matrix(unlist(pdf), nrow = length(data[[1]]), ncol = length(data)) - data <- matrix(unlist(data), ncol = ncol(pdf)) - out <- C_Vec_WeightedDiscretePdf(x, data, pdf) + out <- C_Vec_WeightedDiscretePdf(x, unlist(data), pdf) if (log) { out <- log(out) } @@ -396,8 +395,7 @@ WeightedDiscrete <- R6Class("WeightedDiscrete", data[[i]] <- data[[i]][seq.int(lng)] } cdf <- matrix(unlist(cdf), nrow = length(data[[1]]), ncol = length(data)) - data <- matrix(unlist(data), ncol = ncol(cdf)) - C_Vec_WeightedDiscreteCdf(x, data, cdf, lower.tail, log.p) + C_Vec_WeightedDiscreteCdf(x, unlist(data), cdf, lower.tail, log.p) } else { .wd_cdf(x, data, cdf, lower.tail, log.p) } @@ -414,8 +412,7 @@ WeightedDiscrete <- R6Class("WeightedDiscrete", data[[i]] <- data[[i]][seq.int(lng)] } cdf <- matrix(unlist(cdf), nrow = length(data[[1]]), ncol = length(data)) - data <- matrix(unlist(data), ncol = ncol(cdf)) - C_Vec_WeightedDiscreteQuantile(p, data, cdf, lower.tail, log.p) + C_Vec_WeightedDiscreteQuantile(p, unlist(data), cdf, lower.tail, log.p) } else { C_WeightedDiscreteQuantile(p, data, cdf, lower.tail, log.p) } diff --git a/src/Distributions.cpp b/src/Distributions.cpp index 5752ad0c9..e1ed740ef 100644 --- a/src/Distributions.cpp +++ b/src/Distributions.cpp @@ -399,11 +399,11 @@ NumericMatrix C_ShiftedLoglogisticQuantile(NumericVector x, NumericVector locati // [[Rcpp::export]] -NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericMatrix data, +NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericVector data, NumericMatrix pdf) { - int nc = data.ncol(); - int nr = data.nrow(); + int nc = pdf.ncol(); + int nr = pdf.nrow(); int n = x.length(); NumericMatrix mat(n, nc); @@ -415,7 +415,7 @@ NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericMatrix data, for (int i = 0; i < nc; i++) { for (int k = 0; k < n; k++) { for (int j = 0; j < nr; j++) { - if (data(j, i) == x[k]) { + if (data(j) == x[k]) { mat(k, i) = pdf(j, i); break; } @@ -467,11 +467,11 @@ NumericVector C_WeightedDiscreteCdf(NumericVector x, NumericVector data, Numeric } // [[Rcpp::export]] -NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericMatrix data, NumericMatrix cdf, +NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericVector data, NumericMatrix cdf, bool lower, bool logp) { - int nc = data.ncol(); - int nr = data.nrow(); + int nc = cdf.ncol(); + int nr = cdf.nrow(); int n = x.length(); NumericMatrix mat(n, nc); @@ -483,13 +483,13 @@ NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericMatrix data, Num for (int i = 0; i < nc; i++) { for (int k = 0; k < n; k++) { for (int j = 0; j < nr; j++) { - if (j == 0 && x[k] < data(0, i)) { + if (j == 0 && x[k] < data(0)) { mat(k, i) = 0; break; } else if (j == nr - 1) { mat(k, i) = cdf(j, i); break; - } else if (x[k] >= data(j, i) && x[k] < data(j + 1, i)) { + } else if (x[k] >= data(j) && x[k] < data(j + 1)) { mat(k, i) = cdf(j, i); break; } @@ -540,11 +540,11 @@ NumericVector C_WeightedDiscreteQuantile(NumericVector x, NumericVector data, Nu } // [[Rcpp::export]] -NumericMatrix C_Vec_WeightedDiscreteQuantile(NumericVector x, NumericMatrix data, NumericMatrix cdf, +NumericMatrix C_Vec_WeightedDiscreteQuantile(NumericVector x, NumericVector data, NumericMatrix cdf, bool lower, bool logp) { - int nc = data.ncol(); - int nr = data.nrow(); + int nc = cdf.ncol(); + int nr = cdf.nrow(); int n = x.length(); NumericMatrix mat(n, nc); @@ -567,7 +567,7 @@ NumericMatrix C_Vec_WeightedDiscreteQuantile(NumericVector x, NumericMatrix data } if (y <= cdf(j, i)) { - mat(k, i) = data(j, i); + mat(k, i) = data(j); break; } } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 380ea5b02..467e609e6 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -179,13 +179,13 @@ BEGIN_RCPP END_RCPP } // C_Vec_WeightedDiscretePdf -NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericMatrix data, NumericMatrix pdf); +NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericVector data, NumericMatrix pdf); RcppExport SEXP _distr6_C_Vec_WeightedDiscretePdf(SEXP xSEXP, SEXP dataSEXP, SEXP pdfSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type data(dataSEXP); + Rcpp::traits::input_parameter< NumericVector >::type data(dataSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type pdf(pdfSEXP); rcpp_result_gen = Rcpp::wrap(C_Vec_WeightedDiscretePdf(x, data, pdf)); return rcpp_result_gen; @@ -207,13 +207,13 @@ BEGIN_RCPP END_RCPP } // C_Vec_WeightedDiscreteCdf -NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericMatrix data, NumericMatrix cdf, bool lower, bool logp); +NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericVector data, NumericMatrix cdf, bool lower, bool logp); RcppExport SEXP _distr6_C_Vec_WeightedDiscreteCdf(SEXP xSEXP, SEXP dataSEXP, SEXP cdfSEXP, SEXP lowerSEXP, SEXP logpSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type data(dataSEXP); + Rcpp::traits::input_parameter< NumericVector >::type data(dataSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type cdf(cdfSEXP); Rcpp::traits::input_parameter< bool >::type lower(lowerSEXP); Rcpp::traits::input_parameter< bool >::type logp(logpSEXP); @@ -237,13 +237,13 @@ BEGIN_RCPP END_RCPP } // C_Vec_WeightedDiscreteQuantile -NumericMatrix C_Vec_WeightedDiscreteQuantile(NumericVector x, NumericMatrix data, NumericMatrix cdf, bool lower, bool logp); +NumericMatrix C_Vec_WeightedDiscreteQuantile(NumericVector x, NumericVector data, NumericMatrix cdf, bool lower, bool logp); RcppExport SEXP _distr6_C_Vec_WeightedDiscreteQuantile(SEXP xSEXP, SEXP dataSEXP, SEXP cdfSEXP, SEXP lowerSEXP, SEXP logpSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type data(dataSEXP); + Rcpp::traits::input_parameter< NumericVector >::type data(dataSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type cdf(cdfSEXP); Rcpp::traits::input_parameter< bool >::type lower(lowerSEXP); Rcpp::traits::input_parameter< bool >::type logp(logpSEXP); From 2337982dad54ac78461905cc00a576ebea31010a Mon Sep 17 00:00:00 2001 From: Raphael Sonabend Date: Fri, 10 Nov 2023 22:58:12 +0000 Subject: [PATCH 2/4] Update src/Distributions.cpp --- src/Distributions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Distributions.cpp b/src/Distributions.cpp index e1ed740ef..37b759cf8 100644 --- a/src/Distributions.cpp +++ b/src/Distributions.cpp @@ -415,7 +415,7 @@ NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericVector data, for (int i = 0; i < nc; i++) { for (int k = 0; k < n; k++) { for (int j = 0; j < nr; j++) { - if (data(j) == x[k]) { + if (data[j] == x[k]) { mat(k, i) = pdf(j, i); break; } From 6095caa086e3ac2ff63811f43296d0f245a80166 Mon Sep 17 00:00:00 2001 From: john Date: Sat, 11 Nov 2023 00:19:50 +0100 Subject: [PATCH 3/4] consistent operator for vector indexing --- src/Distributions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Distributions.cpp b/src/Distributions.cpp index 37b759cf8..046566263 100644 --- a/src/Distributions.cpp +++ b/src/Distributions.cpp @@ -483,13 +483,13 @@ NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericVector data, Num for (int i = 0; i < nc; i++) { for (int k = 0; k < n; k++) { for (int j = 0; j < nr; j++) { - if (j == 0 && x[k] < data(0)) { + if (j == 0 && x[k] < data[0]) { mat(k, i) = 0; break; } else if (j == nr - 1) { mat(k, i) = cdf(j, i); break; - } else if (x[k] >= data(j) && x[k] < data(j + 1)) { + } else if (x[k] >= data[j] && x[k] < data[j + 1]) { mat(k, i) = cdf(j, i); break; } @@ -567,7 +567,7 @@ NumericMatrix C_Vec_WeightedDiscreteQuantile(NumericVector x, NumericVector data } if (y <= cdf(j, i)) { - mat(k, i) = data(j); + mat(k, i) = data[j]; break; } } From ddef09a164abd05d766af6e2bccdd8b810301656 Mon Sep 17 00:00:00 2001 From: john Date: Sat, 11 Nov 2023 00:23:51 +0100 Subject: [PATCH 4/4] more informative comment about indexes used --- src/Distributions.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Distributions.cpp b/src/Distributions.cpp index 046566263..a32bc5b59 100644 --- a/src/Distributions.cpp +++ b/src/Distributions.cpp @@ -408,9 +408,9 @@ NumericMatrix C_Vec_WeightedDiscretePdf(NumericVector x, NumericVector data, NumericMatrix mat(n, nc); - // i - distribution - // j - data samples - // k - evaluates + // i - data samples + // j - data points + // k - new data points for (int i = 0; i < nc; i++) { for (int k = 0; k < n; k++) { @@ -476,9 +476,9 @@ NumericMatrix C_Vec_WeightedDiscreteCdf(NumericVector x, NumericVector data, Num NumericMatrix mat(n, nc); - // i - distribution - // j - data samples - // k - evaluates + // i - data samples + // j - data points + // k - new data points for (int i = 0; i < nc; i++) { for (int k = 0; k < n; k++) {