From 81c860e0e4abca77c2a1fff4dbb00bbf7708aeb1 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 6 Sep 2024 12:46:01 -0700
Subject: [PATCH] Inch closer.

---
 R/classifySingleR.R | 35 ++++++++++++++++++++++-------------
 R/trainSingleR.R    |  3 ++-
 R/utils.R           |  5 ++++-
 man/trainSingleR.Rd |  7 ++++---
 4 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/R/classifySingleR.R b/R/classifySingleR.R
index 3a3b747..67aa043 100644
--- a/R/classifySingleR.R
+++ b/R/classifySingleR.R
@@ -109,14 +109,17 @@ classifySingleR <- function(
         trained <- list(trained)
     }
 
-    results <- lapply(trained, FUN=.classify_internals, 
-        test=test, 
-        quantile=quantile, 
-        fine.tune=fine.tune, 
-        tune.thresh=tune.thresh, 
-        prune=prune, 
-        num.threads=num.threads
-    )
+    results <- vector("list", length(trained))
+    for (l in seq_along(results)) {
+        trained[[l]] <- .classify_internals(
+            test=test, 
+            quantile=quantile, 
+            fine.tune=fine.tune, 
+            tune.thresh=tune.thresh, 
+            prune=prune, 
+            num.threads=num.threads
+        )
+    }
 
     if (solo) {
         results[[1]]
@@ -133,19 +136,25 @@ classifySingleR <- function(
 
 #' @importFrom S4Vectors DataFrame metadata metadata<- I
 .classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
-    m <- match(trained$markers$unique, rownames(test))
-    if (anyNA(m)) {
-        stop("'rownames(test)' does not contain all genes used in 'trained'")
+    if (!is.null(trained$options$test.genes)) {
+        if (!identical(trained$options$test.genes, rownames(test))) {
+            stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trainSingleR'")
+        }
+    } else if (nrow(trained$ref) != nrow(test)) {
+        stop("expected 'test' to have the same number of rows as the reference dataset")
     }
 
     trained <- rebuildIndex(trained, num.threads = num.threads)
 
     parsed <- initializeCpp(test)
-    out <- run(parsed, m - 1L, trained$built, 
+    out <- classify_single(
+        test = parsed, 
+        prebuilt = trained$built, 
         quantile = quantile, 
         use_fine_tune = fine.tune, 
         fine_tune_threshold = tune.thresh, 
-        nthreads = num.threads)
+        nthreads = num.threads
+    )
 
     colnames(out$scores) <- trained$labels$unique
     output <- DataFrame(
diff --git a/R/trainSingleR.R b/R/trainSingleR.R
index 65dc72a..8ae111e 100644
--- a/R/trainSingleR.R
+++ b/R/trainSingleR.R
@@ -177,6 +177,7 @@
 trainSingleR <- function(
     ref, 
     labels, 
+    test.genes=NULL,
     genes="de", 
     sd.thresh=NULL, 
     de.method=c("classic", "wilcox", "t"), 
@@ -226,7 +227,7 @@ trainSingleR <- function(
     for (l in seq_along(ref)) {
         curref <- .to_clean_matrix(ref[[l]], assay.type, check.missing, msg="ref", BPPARAM=BPPARAM)
 
-        curlabels <- as.character(labels[[ll]])
+        curlabels <- as.character(labels[[l]])
         stopifnot(length(curlabels) == ncol(curref))
         keep <- !is.na(curlabels)
         if (!all(keep)) {
diff --git a/R/utils.R b/R/utils.R
index 076a4b2..df96eac 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -23,8 +23,11 @@
         old <- getAutoBPPARAM()
         setAutoBPPARAM(BPPARAM)
         on.exit(setAutoBPPARAM(old))
+
+        x <- DelayedArray(x)
+        discard <- rowAnyNAs(x)
         if (any(discard)) {
-            x <- DelayedArray(x)[!discard,,drop=FALSE]
+            x <- x[!discard,,drop=FALSE]
         }
     }
 
diff --git a/man/trainSingleR.Rd b/man/trainSingleR.Rd
index 0c9067f..73ce55a 100644
--- a/man/trainSingleR.Rd
+++ b/man/trainSingleR.Rd
@@ -7,6 +7,7 @@
 trainSingleR(
   ref,
   labels,
+  test.genes = NULL,
   genes = "de",
   sd.thresh = NULL,
   de.method = c("classic", "wilcox", "t"),
@@ -38,6 +39,9 @@ Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or n
 Alternatively, if \code{ref} is a list, \code{labels} should be a list of the same length.
 Each element should contain a character vector or factor specifying the labels for the columns of the corresponding element of \code{ref}.}
 
+\item{test.genes}{Character vector of the names of the genes in the test dataset, i.e., the row names of \code{test} in \code{\link{classifySingleR}}.
+If \code{NULL}, it is assumed that the test dataset and \code{ref} have the same genes in the same row order.}
+
 \item{genes}{A string containing \code{"de"}, indicating that markers should be calculated from \code{ref}.
 For back compatibility, other string values are allowed but will be ignored with a deprecation warning.
 
@@ -90,9 +94,6 @@ If true and any missing values are found, the rows containing these values are s
 
 \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed.
 Relevant for marker detection if \code{genes = NULL}, aggregation if \code{aggr.ref = TRUE}, and \code{NA} checking if \code{check.missing = TRUE}.}
-
-\item{test.genes}{Character vector of the names of the genes in the test dataset, i.e., the row names of \code{test} in \code{\link{classifySingleR}}.
-If \code{NULL}, it is assumed that the test dataset and \code{ref} have the same genes in the same row order.}
 }
 \value{
 For a single reference, a \linkS4class{List} is returned containing: