diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml index 56ded00..546232b 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/check-standard.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} @@ -32,21 +32,24 @@ jobs: _R_CHECK_LENGTH_1_LOGIC2_: verbose steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: rcmdcheck + extra-packages: any::rcmdcheck + needs: check - - uses: r-lib/actions/check-r-package@v1 + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true - name: Show testthat output if: always() diff --git a/DESCRIPTION b/DESCRIPTION index e8480e5..b3e97ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Encoding: UTF-8 Package: bigstatsr Type: Package Title: Statistical Tools for Filebacked Big Matrices -Version: 1.5.13 -Date: 2023-12-13 +Version: 1.5.15 +Date: 2024-03-24 Authors@R: c( person("Florian", "Privé", email = "florian.prive.21@gmail.com", role = c("aut", "cre")), diff --git a/NAMESPACE b/NAMESPACE index 37bf130..8d70140 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -68,6 +68,7 @@ exportMethods(tcrossprod) exportMethods(typeof) import(foreach) import(ggplot2) +import(rmio) importFrom(Rcpp,sourceCpp) importFrom(bigassertr,assert_01) importFrom(bigassertr,assert_all) diff --git a/NEWS.md b/NEWS.md index c1e84aa..48806bd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +## bigstatsr 1.5.14 + +- Error when variables with a zero scaling are used in e.g. `big_randomSVD()` and `big_crossprodSelf()` (#52). + ## bigstatsr 1.5.13 - Add parameter `backingfile` to `big_crossprodSelf()` and `big_cor()` (#170). diff --git a/R/FBM.R b/R/FBM.R index 1a22fd2..ea8b5a3 100644 --- a/R/FBM.R +++ b/R/FBM.R @@ -110,6 +110,7 @@ sub_bk <- function(path, replacement = "", stop_if_not_ext = TRUE) { #' X[] # access as standard R matrix #' #' @exportClass FBM +#' @import rmio #' FBM_RC <- methods::setRefClass( diff --git a/R/crossprodSelf.R b/R/crossprodSelf.R index 2fc6c67..01eb476 100644 --- a/R/crossprodSelf.R +++ b/R/crossprodSelf.R @@ -44,6 +44,8 @@ big_crossprodSelf <- function( tmp1 <- X[ind.row, ind.col[ind1]] ms <- fun.scaling(X, ind.row = ind.row, ind.col = ind.col[ind1]) + if (any_near0(ms$scale)) stop2(MSG_ZERO_SCALE) + mu[ind1] <- ms$center delta[ind1] <- ms$scale sums[ind1] <- colSums(tmp1) diff --git a/R/plot.R b/R/plot.R index e71581b..d8680b4 100644 --- a/R/plot.R +++ b/R/plot.R @@ -12,9 +12,9 @@ #' #' @examples #' library(ggplot2) -#' qplot(y = 1:10) -#' qplot(y = 1:10) + theme_bw() -#' qplot(y = 1:10) + theme_bigstatsr() +#' (p <- ggplot(mapping = aes(x = 1:10, y = 1:10)) + geom_point()) +#' p + theme_bw() +#' p + theme_bigstatsr() theme_bigstatsr <- function(size.rel = 1) { theme_bw() + theme( @@ -31,10 +31,6 @@ theme_bigstatsr <- function(size.rel = 1) { ) } -MY_THEME <- function(p, coeff = 1) { - p + theme_bigstatsr(size.rel = coeff) -} - #' @importFrom cowplot plot_grid #' @export cowplot::plot_grid @@ -101,7 +97,9 @@ plot.big_SVD <- function(x, type = c("screeplot", "scores", "loadings"), assert_lengths(nval, 1) - p <- MY_THEME(qplot(y = x$d[seq_len(nval)]), coeff = coeff) + + p <- ggplot(mapping = aes(x = seq_len(nval), y = x$d[seq_len(nval)])) + + theme_bigstatsr(size.rel = coeff) + + geom_point() + geom_line() + scale_y_log10() + labs(title = "Scree Plot", x = "PC Index", y = "Singular Value") @@ -133,7 +131,9 @@ plot.big_SVD <- function(x, type = c("screeplot", "scores", "loadings"), nx <- scores[1] ny <- scores[2] - MY_THEME(qplot(x = sc[, nx], y = sc[, ny]), coeff = coeff) + + ggplot(mapping = aes(x = sc[, nx], y = sc[, ny])) + + geom_point() + + theme_bigstatsr(size.rel = coeff) + coord_fixed() + labs(title = "Scores of PCA", x = paste0("PC", nx), y = paste0("PC", ny)) @@ -155,7 +155,9 @@ plot.big_SVD <- function(x, type = c("screeplot", "scores", "loadings"), } else { - p <- MY_THEME(qplot(y = x$v[, loadings]), coeff = coeff) + + p <- ggplot(mapping = aes(x = rows_along(x$v), y = x$v[, loadings])) + + geom_point() + + theme_bigstatsr(size.rel = coeff) + labs(title = paste0("Loadings of PC", loadings), x = "Column index", y = NULL) @@ -215,17 +217,20 @@ plot.mhtest <- function(x, type = c("hist", "Manhattan", "Q-Q", "Volcano"), type <- match.arg(type) main <- paste(type, "plot") - if (type == "Manhattan") { - qplot(y = -lpval) + + p <- if (type == "Manhattan") { + ggplot(mapping = aes(x = seq_along(lpval), y = -lpval)) + + geom_point() + labs(title = main, x = "Column Index", y = expression(-log[10](italic("p-value")))) } else if (type == "Volcano") { - qplot(x = x[["estim"]], y = -lpval) + + ggplot(mapping = aes(x = x[["estim"]], y = -lpval)) + + geom_point() + labs(title = main, x = "Estimate", y = expression(-log[10](italic("p-value")))) } else if (type == "Q-Q") { unif.ranked <- stats::ppoints(length(lpval))[rank(lpval)] - qplot(x = -log10(unif.ranked), y = -lpval) + + ggplot(mapping = aes(x = -log10(unif.ranked), y = -lpval)) + + geom_point() + labs(title = main, x = expression(Expected~~-log[10](italic("p-value"))), y = expression(Observed~~-log[10](italic("p-value")))) + @@ -239,7 +244,7 @@ plot.mhtest <- function(x, type = c("hist", "Manhattan", "Q-Q", "Volcano"), labs(x = "p-value") } - last_plot() + theme_bigstatsr(size.rel = coeff) + p + theme_bigstatsr(size.rel = coeff) } ################################################################################ diff --git a/R/randomSVD.R b/R/randomSVD.R index 6d632b6..239218e 100644 --- a/R/randomSVD.R +++ b/R/randomSVD.R @@ -13,7 +13,7 @@ svds4.par <- function(X, fun.scaling, ind.row, ind.col, k, Ax <- FBM(n, ncores) Atx <- FBM(m, 1) - calc <- FBM(ncores, 1, init = 0) + calc <- FBM(ncores, 1, init = -1) cluster_type <- getOption("bigstatsr.cluster.type") if (verbose) { @@ -26,6 +26,14 @@ svds4.par <- function(X, fun.scaling, ind.row, ind.col, k, if (ic == 0) { # I'm the master + # Wait for the slaves to finish computing the scalings + while (any(calc[] < 0)) { + if (any(calc[] == -2)) { + calc[] <- 3 # signal all slaves to stop + stop2(MSG_ZERO_SCALE) + } else Sys.sleep(TIME) + } + printf <- function(...) cat(sprintf(...)) it <- 0 # A @@ -33,8 +41,8 @@ svds4.par <- function(X, fun.scaling, ind.row, ind.col, k, printf("%d - computing A * x\n", it <<- it + 1) Atx[] <- x calc[] <- 1 # make them work - # Master wait for its slaves to finish working - while (sum(calc[]) > 0) Sys.sleep(TIME) + # the master wait for its slaves to finish working + while (any(calc[] > 0)) Sys.sleep(TIME) rowSums(Ax[]) } # Atrans @@ -42,8 +50,8 @@ svds4.par <- function(X, fun.scaling, ind.row, ind.col, k, printf("%d - computing At * x\n", it <<- it + 1) Ax[, 1] <- x calc[] <- 2 # make them work - # Master wait for its slaves to finish working - while (sum(calc[]) > 0) Sys.sleep(TIME) + # the master wait for its slaves to finish working + while (any(calc[] > 0)) Sys.sleep(TIME) Atx[] } @@ -61,6 +69,12 @@ svds4.par <- function(X, fun.scaling, ind.row, ind.col, k, # Scaling ms <- fun.scaling(X, ind.row = ind.row, ind.col = ind.col.part) + if (any_near0(ms$scale)) { + calc[ic] <- -2 # signal the master there is an issue with the scaling + return(ms) + } else { + calc[ic] <- 0 + } repeat { # Slaves wait for their master to give them orders @@ -78,8 +92,6 @@ svds4.par <- function(X, fun.scaling, ind.row, ind.col, k, } else if (c == 3) { # End break - } else { - stop("RandomSVD: unclear order from the master.") } calc[ic] <- 0 } @@ -110,6 +122,7 @@ svds4.seq <- function(X, fun.scaling, ind.row, ind.col, k, tol, verbose, # scaling ms <- fun.scaling(X, ind.row, ind.col) + if (any_near0(ms$scale)) stop2(MSG_ZERO_SCALE) printf <- function(...) if (verbose) cat(sprintf(...)) it <- 0 diff --git a/R/scaling.R b/R/scaling.R index 48b954c..1b6f54c 100644 --- a/R/scaling.R +++ b/R/scaling.R @@ -55,6 +55,8 @@ big_scale <- function(center = TRUE, scale = TRUE) { sds <- rep(1, m) } + if (any_near0(sds)) warning2(MSG_ZERO_SCALE) + data.frame(center = means, scale = sds) } } diff --git a/R/tcrossprodSelf.R b/R/tcrossprodSelf.R index 19fa093..b8b9585 100644 --- a/R/tcrossprodSelf.R +++ b/R/tcrossprodSelf.R @@ -42,6 +42,8 @@ big_tcrossprodSelf <- function( ind <- seq2(intervals[j, ]) ind.col.ind <- ind.col[ind] ms <- fun.scaling(X, ind.row = ind.row, ind.col = ind.col.ind) + if (any_near0(ms$scale)) stop2(MSG_ZERO_SCALE) + means[ind] <- ms$center sds[ind] <- ms$scale increment_scaled_tcrossprod(K, X_part_temp, X, diff --git a/R/utils-assert.R b/R/utils-assert.R index 79d00c8..d159b64 100644 --- a/R/utils-assert.R +++ b/R/utils-assert.R @@ -4,6 +4,17 @@ ################################################################################ +MSG_ZERO_SCALE <- + "Some variables have zero scaling; remove them before attempting to scale." + +################################################################################ + +any_near0 <- function(x) { + any(dplyr::near(x, 0)) +} + +################################################################################ + as_vec <- function(x) { x2 <- drop(x) if (is.matrix(x2)) diff --git a/docs/news/index.html b/docs/news/index.html index 6d8a27a..514977d 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@
@@ -85,6 +85,10 @@NEWS.md
+ big_randomSVD()
and big_crossprodSelf()
(#52).backingfile
to big_crossprodSelf()
and big_cor()
(#170).