From 4dd3c2b7a9d03e6f70f3009ed7764175cb911bc8 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 6 Sep 2024 15:58:27 -0700
Subject: [PATCH] Report the names of the references in the
 combineRecomputedResults output.

---
 R/classifySingleR.R            | 1 +
 R/combineRecomputedResults.R   | 6 ++----
 R/trainSingleR.R               | 1 +
 tests/testthat/test-classify.R | 8 ++++++++
 tests/testthat/test-train.R    | 9 +++++----
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/R/classifySingleR.R b/R/classifySingleR.R
index 52987cc..5001259 100644
--- a/R/classifySingleR.R
+++ b/R/classifySingleR.R
@@ -109,6 +109,7 @@ classifySingleR <- function(
     }
 
     results <- vector("list", length(trained))
+    names(results) <- names(trained)
     for (l in seq_along(results)) {
         results[[l]] <- .classify_internals(
             test=test, 
diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R
index f1f1b69..c8e4c50 100644
--- a/R/combineRecomputedResults.R
+++ b/R/combineRecomputedResults.R
@@ -21,7 +21,7 @@
 #' For any given cell, entries of this matrix are only non-\code{NA} for the assigned label in each reference;
 #' scores are not recomputed for the other labels.
 #' \item \code{labels}, a character vector containing the per-cell combined label across references.
-#' \item \code{references}, an integer vector specifying the reference from which the combined label was derived.
+#' \item \code{reference}, an integer vector specifying the reference from which the combined label was derived.
 #' \item \code{orig.results}, a DataFrame containing \code{results}.
 #' }
 #' It may also contain \code{pruned.labels} if these were also present in \code{results}.
@@ -206,15 +206,13 @@ combineRecomputedResults <- function(
     }
 
     output <- DataFrame(labels=chosen.label, row.names=rownames(results[[1]]))
-
     if (has.pruned) {
         output$pruned.labels <- chosen.pruned
     }
 
     output$reference <- chosen
-
     if (is.null(names(results))) {
-        names(results) <- sprintf("ref%i", seq_along(results))
+        names(results) <- sprintf("ref%s", seq_along(results))
     }
     output$orig.results <- do.call(DataFrame, lapply(results, I))
 
diff --git a/R/trainSingleR.R b/R/trainSingleR.R
index 2f6d81f..5535496 100644
--- a/R/trainSingleR.R
+++ b/R/trainSingleR.R
@@ -224,6 +224,7 @@ trainSingleR <- function(
     }
 
     output <- vector("list", length(ref))
+    names(output) <- names(ref)
     for (l in seq_along(ref)) {
         curref <- .to_clean_matrix(ref[[l]], assay.type, check.missing, msg="ref", BPPARAM=BPPARAM)
 
diff --git a/tests/testthat/test-classify.R b/tests/testthat/test-classify.R
index 05d0790..2537ef5 100644
--- a/tests/testthat/test-classify.R
+++ b/tests/testthat/test-classify.R
@@ -86,10 +86,18 @@ test_that("classifySingleR works with multiple references", {
 
     mtrain <- trainSingleR(list(training1, training2), list(training1$label, training2$label))
     out <- classifySingleR(test, mtrain)
+    expect_identical(names(out$orig.results), c("ref1", "ref2"))
+    expect_true(all(out$reference %in% 1:2))
 
     ref1 <- classifySingleR(test, mtrain[[1]])
     ref2 <- classifySingleR(test, mtrain[[2]])
     expect_identical(out, combineRecomputedResults(list(ref1, ref2), test, mtrain))
+
+    # Preserves names of the references themselves.
+    mtrain <- trainSingleR(list(foo=training1, bar=training2), list(training1$label, training2$label))
+    out <- classifySingleR(test, mtrain)
+    expect_identical(names(out$orig.results), c("foo", "bar"))
+    expect_true(all(out$reference %in% 1:2))
 })
 
 test_that("classifySingleR behaves with silly inputs", {
diff --git a/tests/testthat/test-train.R b/tests/testthat/test-train.R
index 48c7ac9..f216fdd 100644
--- a/tests/testthat/test-train.R
+++ b/tests/testthat/test-train.R
@@ -137,16 +137,17 @@ test_that("trainSingleR behaves with multiple references, plus recomputation", {
     training1 <- training1[sample(nrow(training1)),]
     rownames(training1) <- rownames(training)
     
-    set.seed(1000)
     ref1 <- trainSingleR(training1, training1$label)
     ref2 <- trainSingleR(training2, training2$label)
-
-    set.seed(1000)
-    out <- trainSingleR(list(training1, training2), list(training1$label, training2$label), recompute=TRUE)
+    out <- trainSingleR(list(training1, training2), list(training1$label, training2$label))
 
     except.built <- setdiff(names(ref1), "built")
     expect_identical(ref1[except.built], out[[1]][except.built])
     expect_identical(ref2[except.built], out[[2]][except.built])
+
+    # Same result with names.
+    out <- trainSingleR(list(foo=training1, bar=training2), list(training1$label, training2$label))
+    expect_identical(names(out), c("foo", "bar"))
 })
 
 test_that("trainSingleR behaves with aggregation turned on", {