From 81c860e0e4abca77c2a1fff4dbb00bbf7708aeb1 Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 6 Sep 2024 12:46:01 -0700 Subject: [PATCH] Inch closer. --- R/classifySingleR.R | 35 ++++++++++++++++++++++------------- R/trainSingleR.R | 3 ++- R/utils.R | 5 ++++- man/trainSingleR.Rd | 7 ++++--- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/R/classifySingleR.R b/R/classifySingleR.R index 3a3b747..67aa043 100644 --- a/R/classifySingleR.R +++ b/R/classifySingleR.R @@ -109,14 +109,17 @@ classifySingleR <- function( trained <- list(trained) } - results <- lapply(trained, FUN=.classify_internals, - test=test, - quantile=quantile, - fine.tune=fine.tune, - tune.thresh=tune.thresh, - prune=prune, - num.threads=num.threads - ) + results <- vector("list", length(trained)) + for (l in seq_along(results)) { + trained[[l]] <- .classify_internals( + test=test, + quantile=quantile, + fine.tune=fine.tune, + tune.thresh=tune.thresh, + prune=prune, + num.threads=num.threads + ) + } if (solo) { results[[1]] @@ -133,19 +136,25 @@ classifySingleR <- function( #' @importFrom S4Vectors DataFrame metadata metadata<- I .classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) { - m <- match(trained$markers$unique, rownames(test)) - if (anyNA(m)) { - stop("'rownames(test)' does not contain all genes used in 'trained'") + if (!is.null(trained$options$test.genes)) { + if (!identical(trained$options$test.genes, rownames(test))) { + stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trainSingleR'") + } + } else if (nrow(trained$ref) != nrow(test)) { + stop("expected 'test' to have the same number of rows as the reference dataset") } trained <- rebuildIndex(trained, num.threads = num.threads) parsed <- initializeCpp(test) - out <- run(parsed, m - 1L, trained$built, + out <- classify_single( + test = parsed, + prebuilt = trained$built, quantile = quantile, use_fine_tune = fine.tune, fine_tune_threshold = tune.thresh, - nthreads = num.threads) + nthreads = num.threads + ) colnames(out$scores) <- trained$labels$unique output <- DataFrame( diff --git a/R/trainSingleR.R b/R/trainSingleR.R index 65dc72a..8ae111e 100644 --- a/R/trainSingleR.R +++ b/R/trainSingleR.R @@ -177,6 +177,7 @@ trainSingleR <- function( ref, labels, + test.genes=NULL, genes="de", sd.thresh=NULL, de.method=c("classic", "wilcox", "t"), @@ -226,7 +227,7 @@ trainSingleR <- function( for (l in seq_along(ref)) { curref <- .to_clean_matrix(ref[[l]], assay.type, check.missing, msg="ref", BPPARAM=BPPARAM) - curlabels <- as.character(labels[[ll]]) + curlabels <- as.character(labels[[l]]) stopifnot(length(curlabels) == ncol(curref)) keep <- !is.na(curlabels) if (!all(keep)) { diff --git a/R/utils.R b/R/utils.R index 076a4b2..df96eac 100644 --- a/R/utils.R +++ b/R/utils.R @@ -23,8 +23,11 @@ old <- getAutoBPPARAM() setAutoBPPARAM(BPPARAM) on.exit(setAutoBPPARAM(old)) + + x <- DelayedArray(x) + discard <- rowAnyNAs(x) if (any(discard)) { - x <- DelayedArray(x)[!discard,,drop=FALSE] + x <- x[!discard,,drop=FALSE] } } diff --git a/man/trainSingleR.Rd b/man/trainSingleR.Rd index 0c9067f..73ce55a 100644 --- a/man/trainSingleR.Rd +++ b/man/trainSingleR.Rd @@ -7,6 +7,7 @@ trainSingleR( ref, labels, + test.genes = NULL, genes = "de", sd.thresh = NULL, de.method = c("classic", "wilcox", "t"), @@ -38,6 +39,9 @@ Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or n Alternatively, if \code{ref} is a list, \code{labels} should be a list of the same length. Each element should contain a character vector or factor specifying the labels for the columns of the corresponding element of \code{ref}.} +\item{test.genes}{Character vector of the names of the genes in the test dataset, i.e., the row names of \code{test} in \code{\link{classifySingleR}}. +If \code{NULL}, it is assumed that the test dataset and \code{ref} have the same genes in the same row order.} + \item{genes}{A string containing \code{"de"}, indicating that markers should be calculated from \code{ref}. For back compatibility, other string values are allowed but will be ignored with a deprecation warning. @@ -90,9 +94,6 @@ If true and any missing values are found, the rows containing these values are s \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed. Relevant for marker detection if \code{genes = NULL}, aggregation if \code{aggr.ref = TRUE}, and \code{NA} checking if \code{check.missing = TRUE}.} - -\item{test.genes}{Character vector of the names of the genes in the test dataset, i.e., the row names of \code{test} in \code{\link{classifySingleR}}. -If \code{NULL}, it is assumed that the test dataset and \code{ref} have the same genes in the same row order.} } \value{ For a single reference, a \linkS4class{List} is returned containing: