diff --git a/DESCRIPTION b/DESCRIPTION
index 7e96163..ef584aa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SingleR
 Title: Reference-Based Single-Cell RNA-Seq Annotation
-Version: 2.7.1
-Date: 2024-05-22
+Version: 2.7.2
+Date: 2024-09-06
 Authors@R: c(person("Dvir", "Aran", email="dvir.aran@ucsf.edu", role=c("aut", "cph")),
     person("Aaron", "Lun", email="infinite.monkeys.with.keyboards@gmail.com", role=c("ctb", "cre")),
     person("Daniel", "Bunis", role="ctb"),
@@ -20,6 +20,7 @@ Imports:
     DelayedMatrixStats,
     BiocParallel,
     BiocSingular,
+    BiocNeighbors,
     stats,
     utils,
     Rcpp,
@@ -28,6 +29,7 @@ Imports:
 LinkingTo:
     Rcpp,
     beachmat,
+    assorthead,
     BiocNeighbors
 Suggests:
     testthat,
@@ -57,6 +59,6 @@ biocViews:
 SystemRequirements: C++17
 VignetteBuilder: knitr
 Encoding: UTF-8
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 URL: https://github.com/LTLA/SingleR
 BugReports: https://support.bioconductor.org/
diff --git a/NAMESPACE b/NAMESPACE
index 91f111f..5cbaed2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -27,6 +27,9 @@ importClassesFrom(Matrix,dgCMatrix)
 importClassesFrom(S4Vectors,DataFrame)
 importClassesFrom(S4Vectors,List)
 importClassesFrom(SummarizedExperiment,SummarizedExperiment)
+importFrom(BiocNeighbors,AnnoyParam)
+importFrom(BiocNeighbors,KmknnParam)
+importFrom(BiocNeighbors,defineBuilder)
 importFrom(BiocParallel,SerialParam)
 importFrom(BiocParallel,bpisup)
 importFrom(BiocParallel,bpmapply)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 78fc7a3..d4c7890 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,6 +1,16 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
+classify_integrated <- function(test, results, integrated_build, quantile, nthreads) {
+    .Call('_SingleR_classify_integrated', PACKAGE = 'SingleR', test, results, integrated_build, quantile, nthreads)
+}
+
+#' @importFrom Rcpp sourceCpp
+#' @useDynLib SingleR
+classify_single <- function(test, prebuilt, quantile, use_fine_tune, fine_tune_threshold, nthreads) {
+    .Call('_SingleR_classify_single', PACKAGE = 'SingleR', test, prebuilt, quantile, use_fine_tune, fine_tune_threshold, nthreads)
+}
+
 find_classic_markers <- function(nlabels, ngenes, labels, ref, de_n, nthreads) {
     .Call('_SingleR_find_classic_markers', PACKAGE = 'SingleR', nlabels, ngenes, labels, ref, de_n, nthreads)
 }
@@ -9,31 +19,21 @@ grouped_medians <- function(ref, groups, ngroups, nthreads) {
     .Call('_SingleR_grouped_medians', PACKAGE = 'SingleR', ref, groups, ngroups, nthreads)
 }
 
-integrate_build <- function(test_features, references, ref_ids, labels, prebuilt, nthreads) {
-    .Call('_SingleR_integrate_build', PACKAGE = 'SingleR', test_features, references, ref_ids, labels, prebuilt, nthreads)
-}
-
-integrate_run <- function(test, results, integrated_build, quantile, nthreads) {
-    .Call('_SingleR_integrate_run', PACKAGE = 'SingleR', test, results, integrated_build, quantile, nthreads)
+train_integrated <- function(test_features, references, ref_ids, labels, prebuilt, nthreads) {
+    .Call('_SingleR_train_integrated', PACKAGE = 'SingleR', test_features, references, ref_ids, labels, prebuilt, nthreads)
 }
 
 #' @importFrom Rcpp sourceCpp
 #' @useDynLib SingleR
-prebuild <- function(ref, labels, markers, approximate, nthreads) {
-    .Call('_SingleR_prebuild', PACKAGE = 'SingleR', ref, labels, markers, approximate, nthreads)
+train_single <- function(test_features, ref, ref_features, labels, markers, builder, nthreads) {
+    .Call('_SingleR_train_single', PACKAGE = 'SingleR', test_features, ref, ref_features, labels, markers, builder, nthreads)
 }
 
-get_subset <- function(built) {
-    .Call('_SingleR_get_subset', PACKAGE = 'SingleR', built)
+get_ref_subset <- function(built) {
+    .Call('_SingleR_get_ref_subset', PACKAGE = 'SingleR', built)
 }
 
 is_valid_built <- function(built) {
     .Call('_SingleR_is_valid_built', PACKAGE = 'SingleR', built)
 }
 
-#' @importFrom Rcpp sourceCpp
-#' @useDynLib SingleR
-run <- function(test, subset, prebuilt, quantile, use_fine_tune, fine_tune_threshold, nthreads) {
-    .Call('_SingleR_run', PACKAGE = 'SingleR', test, subset, prebuilt, quantile, use_fine_tune, fine_tune_threshold, nthreads)
-}
-
diff --git a/R/SingleR.R b/R/SingleR.R
index 6f6f4e7..a8bf931 100644
--- a/R/SingleR.R
+++ b/R/SingleR.R
@@ -102,33 +102,23 @@ SingleR <- function(
         bpstart(BPPARAM)
         on.exit(bpstop(BPPARAM))
     }
-    test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)
-
-    # Converting to a common list format for ease of data munging.
-    if (single.ref <- !.is_list(ref)) {
-        ref <- list(ref)
-    }
 
-    ref <- lapply(ref, FUN=.to_clean_matrix, assay.type=assay.type.ref, 
-        check.missing=check.missing, msg="ref", BPPARAM=BPPARAM)
-    refnames <- Reduce(intersect, lapply(ref, rownames))
+    # We have to do all this row-subsetting at the start before trainSingleR,
+    # otherwise 'test.genes' won't match up to the filtered 'test'.
+    test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)
 
-    keep <- intersect(rownames(test), refnames)
-    if (length(keep) == 0) {
-        stop("no common genes between 'test' and 'ref'")
+    tmp.ref <- ref
+    if (!is.list(tmp.ref) || is.data.frame(tmp.ref)) {
+        tmp.ref <- list(ref)
     }
-    if (!identical(keep, rownames(test))) {
-        test <- test[keep,]
-    }
-    for (i in seq_along(ref)) {
-        if (!identical(keep, rownames(ref[[i]]))) {
-            ref[[i]] <- ref[[i]][keep,,drop=FALSE]
+    for (rr in tmp.ref) {
+        keep <- rownames(test) %in% rownames(rr)
+        if (!all(keep)) {
+            test <- DelayedArray(test)[keep,,drop=FALSE] # only keeping the intersection, for safety's sake - see ?combineRecomputedResults.
         }
     }
-
-    # Converting back.
-    if (single.ref) {
-        ref <- ref[[1]]
+    if (nrow(test) == 0) {
+        stop("no common genes between 'test' and 'ref")
     }
 
     trained <- trainSingleR(
@@ -143,7 +133,8 @@ SingleR <- function(
         aggr.args = aggr.args, 
         recompute=recompute,
         restrict = restrict, 
-        check.missing=FALSE, 
+        test.genes=rownames(test),
+        check.missing=check.missing, 
         BNPARAM=BNPARAM, 
         num.threads = num.threads, 
         BPPARAM=BPPARAM
diff --git a/R/classifySingleR.R b/R/classifySingleR.R
index 3a3b747..27e055d 100644
--- a/R/classifySingleR.R
+++ b/R/classifySingleR.R
@@ -3,17 +3,19 @@
 #' Assign labels to each cell in a test dataset, using a pre-trained classifier combined with an iterative fine-tuning approach.
 #' 
 #' @param test A numeric matrix of single-cell expression values where rows are genes and columns are cells.
+#' Each row should be named with the gene name.
 #'
 #' Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
 #' @param trained A \linkS4class{List} containing the output of the \code{\link{trainSingleR}} function.
+#' If the row names of \code{test} are not exactly the same as the reference dataset, the call to \code{trainSingleR} should set \code{test.genes=rownames(test)}.
+#'
 #' Alternatively, a List of Lists produced by \code{\link{trainSingleR}} for multiple references.
 #' @param quantile A numeric scalar specifying the quantile of the correlation distribution to use to compute the score for each label.
 #' @param fine.tune A logical scalar indicating whether fine-tuning should be performed. 
 #' @param tune.thresh A numeric scalar specifying the maximum difference from the maximum correlation to use in fine-tuning.
 #' @param sd.thresh Deprecated and ignored.
 #' @param assay.type Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \linkS4class{SummarizedExperiment}.
-#' @param check.missing Logical scalar indicating whether rows should be checked for missing values.
-#' If true and any missing values are found, the rows containing these values are silently removed.
+#' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.
 #' @param prune A logical scalar indicating whether label pruning should be performed.
 #' @param num.threads Integer scalar specifying the number of threads to use for classification.
 #' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying the parallelization scheme to use for \code{NA} scanning, when \code{check.missing=TRUE}.
@@ -98,25 +100,30 @@ classifySingleR <- function(
     sd.thresh=NULL, 
     prune=TRUE, 
     assay.type="logcounts", 
-    check.missing=TRUE,
+    check.missing=FALSE,
     num.threads = bpnworkers(BPPARAM),
     BPPARAM=SerialParam()) 
 {
-    test <- .to_clean_matrix(test, assay.type, check.missing, msg="test", BPPARAM=BPPARAM)
+    test <- .to_clean_matrix(test, assay.type, check.missing=FALSE, msg="test", BPPARAM=BPPARAM)
 
     solo <- .is_solo(trained)
     if (solo) { 
         trained <- list(trained)
     }
 
-    results <- lapply(trained, FUN=.classify_internals, 
-        test=test, 
-        quantile=quantile, 
-        fine.tune=fine.tune, 
-        tune.thresh=tune.thresh, 
-        prune=prune, 
-        num.threads=num.threads
-    )
+    results <- vector("list", length(trained))
+    names(results) <- names(trained)
+    for (l in seq_along(results)) {
+        results[[l]] <- .classify_internals(
+            test=test, 
+            trained=trained[[l]],
+            quantile=quantile, 
+            fine.tune=fine.tune, 
+            tune.thresh=tune.thresh, 
+            prune=prune, 
+            num.threads=num.threads
+        )
+    }
 
     if (solo) {
         results[[1]]
@@ -131,21 +138,30 @@ classifySingleR <- function(
     } 
 }
 
-#' @importFrom S4Vectors DataFrame metadata metadata<- I
-.classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
-    m <- match(trained$markers$unique, rownames(test))
-    if (anyNA(m)) {
-        stop("'rownames(test)' does not contain all genes used in 'trained'")
+.check_test_genes <- function(test, trained) {
+    if (!is.null(trained$options$test.genes)) {
+        if (!identical(trained$options$test.genes, rownames(test))) {
+            stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trained'")
+        }
+    } else if (!identical(rownames(trained$ref), rownames(test))) {
+        stop("expected 'rownames(test)' to be the same as 'rownames(ref)' in 'trained'")
     }
+}
 
+#' @importFrom S4Vectors DataFrame metadata metadata<- I
+.classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
+    .check_test_genes(test, trained)
     trained <- rebuildIndex(trained, num.threads = num.threads)
 
     parsed <- initializeCpp(test)
-    out <- run(parsed, m - 1L, trained$built, 
+    out <- classify_single(
+        test = parsed, 
+        prebuilt = trained$built, 
         quantile = quantile, 
         use_fine_tune = fine.tune, 
         fine_tune_threshold = tune.thresh, 
-        nthreads = num.threads)
+        nthreads = num.threads
+    )
 
     colnames(out$scores) <- trained$labels$unique
     output <- DataFrame(
diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R
index 773baa4..f9b1fe9 100644
--- a/R/combineRecomputedResults.R
+++ b/R/combineRecomputedResults.R
@@ -6,10 +6,11 @@
 #'
 #' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.
 #' @inheritParams SingleR
+#' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.
 #' @param trained A list of \linkS4class{List}s containing the trained outputs of multiple references,
 #' equivalent to either (i) the output of \code{\link{trainSingleR}} on multiple references with \code{recompute=TRUE},
 #' or (ii) running \code{trainSingleR} on each reference separately and manually making a list of the trained outputs.
-#' @param warn.lost Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are \dQuote{lost} in other references.
+#' @param warn.lost Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.
 #' @param quantile Numeric scalar specifying the quantile of the correlation distribution to use for computing the score, see \code{\link{classifySingleR}}.
 #' @param allow.lost Deprecated.
 #'
@@ -20,7 +21,7 @@
 #' For any given cell, entries of this matrix are only non-\code{NA} for the assigned label in each reference;
 #' scores are not recomputed for the other labels.
 #' \item \code{labels}, a character vector containing the per-cell combined label across references.
-#' \item \code{references}, an integer vector specifying the reference from which the combined label was derived.
+#' \item \code{reference}, an integer vector specifying the reference from which the combined label was derived.
 #' \item \code{orig.results}, a DataFrame containing \code{results}.
 #' }
 #' It may also contain \code{pruned.labels} if these were also present in \code{results}.
@@ -100,58 +101,74 @@
 #'
 #' @export
 #' @importFrom S4Vectors DataFrame metadata<-
+#' @importFrom beachmat initializeCpp
 combineRecomputedResults <- function(
     results, 
     test, 
     trained, 
     quantile=0.8, 
     assay.type.test="logcounts", 
-    check.missing=TRUE, 
-    allow.lost=FALSE, 
+    check.missing=FALSE,
     warn.lost=TRUE,
+    allow.lost=FALSE, 
     num.threads = bpnworkers(BPPARAM),
     BPPARAM=SerialParam())
 {
-    all.names <- c(list(colnames(test)), lapply(results, rownames))
-    if (length(unique(all.names)) != 1) {
-        stop("cell/cluster names in 'results' are not identical")
-    }
-    all.nrow <- c(ncol(test), vapply(results, nrow, 0L))
-    if (length(unique(all.nrow)) != 1) {
-        stop("numbers of cells/clusters in 'results' are not identical")
+    test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=FALSE, msg="test", BPPARAM=BPPARAM)
+
+    # Applying the sanity checks.
+    stopifnot(length(results) == length(trained))
+    for (i in seq_along(results)) {
+        curres <- results[[i]]
+        if (ncol(test) != nrow(curres)) {
+            stop("numbers of cells/clusters in 'results' are not identical")
+        }
+        if (!identical(rownames(curres), colnames(test))) {
+            stop("cell/cluster names in 'results' are not identical")
+        }
+
+        curtrain <- trained[[i]]
+        if (!all(curres$labels %in% curtrain$labels$unique)) {
+            stop("not all labels in 'results' are present in 'trained'")
+        }
+        .check_test_genes(test, curtrain)
     }
 
-    # Checking the marker consistency.
+    # Checking the genes.
     all.refnames <- lapply(trained, function(x) rownames(x$ref))
-    intersected <- Reduce(intersect, all.refnames)
-    for (i in seq_along(trained)) {
-        if (!all(trained[[i]]$markers$unique %in% rownames(test))) {
-            stop("all markers stored in 'results' should be present in 'test'")
-        } else if (warn.lost && !all(trained[[i]]$markers$unique %in% intersected)) {
-            warning("entries of 'trained' differ in the universe of available markers")
+    if (warn.lost) {
+        intersected <- Reduce(intersect, all.refnames)
+        for (i in seq_along(trained)) {
+            if (!all(trained[[i]]$markers$unique %in% intersected)) {
+                warning("not all markers in 'trained' are available in each reference")
+            }
         }
     }
-    
+
     # Applying the integration.
     universe <- Reduce(union, c(list(rownames(test)), all.refnames))
-    ibuilt <- integrate_build(
-        match(rownames(test), universe) - 1L,
-        lapply(trained, function(x) initializeCpp(x$ref)),
-        lapply(trained, function(x) match(rownames(x$ref), universe) - 1L), 
-        lapply(trained, function(x) match(x$labels$full, x$labels$unique) - 1L),
-        lapply(trained, function(x) x$built),
+    ibuilt <- train_integrated(
+        test_features=match(rownames(test), universe) - 1L,
+        references=lapply(trained, function(x) initializeCpp(x$ref)),
+        ref_ids=lapply(all.refnames, function(x) match(x, universe) - 1L), 
+        labels=lapply(trained, function(x) match(x$labels$full, x$labels$unique) - 1L),
+        prebuilt=lapply(trained, function(x) rebuildIndex(x)$built),
         nthreads = num.threads
     )
 
-    test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM)
     collated <- vector("list", length(trained))
     for (i in seq_along(collated)) {
         collated[[i]] <- match(results[[i]]$labels, trained[[i]]$labels$unique) - 1L
     }
 
     parsed <- initializeCpp(test)
-    irun <- integrate_run(parsed, collated, ibuilt, quantile = quantile, nthreads = num.threads) 
-    scores <- irun$scores
+    irun <- classify_integrated(
+        test=parsed,
+        results=collated,
+        integrated_build=ibuilt,
+        quantile=quantile,
+        nthreads=num.threads
+    ) 
 
     # Organizing the outputs.
     base.scores <- vector("list", length(results))
@@ -159,7 +176,7 @@ combineRecomputedResults <- function(
         mat <- results[[r]]$scores   
         mat[] <- NA_real_
         idx <- cbind(seq_len(nrow(mat)), collated[[r]] + 1L)
-        mat[idx] <- scores[,r]
+        mat[idx] <- irun$scores[,r]
         base.scores[[r]] <- mat
     }
 
diff --git a/R/getClassicMarkers.R b/R/getClassicMarkers.R
index 5ab6c93..4a95dee 100644
--- a/R/getClassicMarkers.R
+++ b/R/getClassicMarkers.R
@@ -64,6 +64,10 @@ getClassicMarkers <- function(ref, labels, assay.type="logcounts", check.missing
         labels <- list(labels)
     }
 
+    for (i in seq_along(ref)) {
+        ref[[i]] <- .to_clean_matrix(ref[[i]], assay.type, check.missing, msg="ref", BPPARAM=BPPARAM)
+    }
+
     # Setting up references.
     common <- Reduce(intersect, lapply(ref, rownames))
     if (length(common)==0L && any(vapply(ref, nrow, 0L) > 0L)) {
@@ -73,9 +77,7 @@ getClassicMarkers <- function(ref, labels, assay.type="logcounts", check.missing
 
     for (i in seq_along(ref)) {
         current <- ref[[i]][common,,drop=FALSE]
-        current <- .to_clean_matrix(current, assay.type, check.missing, msg="ref", BPPARAM=BPPARAM)
         curptr <- initializeCpp(current)
-
         flabels <- factor(labels[[i]])
         gm <- grouped_medians(curptr, as.integer(flabels) - 1L, nlevels(flabels), nthreads = num.threads)
         colnames(gm) <- levels(flabels)
diff --git a/R/rebuildIndices.R b/R/rebuildIndices.R
index 87566ab..72e3261 100644
--- a/R/rebuildIndices.R
+++ b/R/rebuildIndices.R
@@ -46,7 +46,8 @@ rebuildIndex <- function(trained, num.threads=1) {
             markers=trained$markers$full, 
             labels=trained$labels$full, 
             ulabels=trained$labels$unique, 
-            approximate=trained$options$approximate, 
+            BNPARAM=trained$options$BNPARAM,
+            test.genes=trained$options$test.genes,
             num.threads=num.threads) 
     }
     trained
diff --git a/R/trainSingleR.R b/R/trainSingleR.R
index fe56ec5..e74ea41 100644
--- a/R/trainSingleR.R
+++ b/R/trainSingleR.R
@@ -8,12 +8,11 @@
 #' 
 #' Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
 #'
-#' Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references,
-#' in which case the row names are expected to be the same across all objects.
+#' Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references.
 #' @param labels A character vector or factor of known labels for all samples in \code{ref}.
 #' 
 #' Alternatively, if \code{ref} is a list, \code{labels} should be a list of the same length.
-#' Each element should contain a character vector or factor specifying the label for the corresponding entry of \code{ref}.
+#' Each element should contain a character vector or factor specifying the labels for the columns of the corresponding element of \code{ref}.
 #' @param genes A string containing \code{"de"}, indicating that markers should be calculated from \code{ref}.
 #' For back compatibility, other string values are allowed but will be ignored with a deprecation warning.
 #' 
@@ -45,13 +44,15 @@
 #' if \code{ref} is a \linkS4class{SummarizedExperiment} object (or is a list that contains one or more such objects).
 #' @param check.missing Logical scalar indicating whether rows should be checked for missing values.
 #' If true and any missing values are found, the rows containing these values are silently removed.
-#' @param BNPARAM Deprecated and ignored.
-#' @param approximate Logical scalar indicating whether a faster approximate method should be used to compute the quantile.
+#' @param BNPARAM A \linkS4class{BiocNeighborParam} object specifying how the neighbor search index should be constructed.
+#' @param approximate Deprecated, use \code{BNPARAM} instead.
 #' @param num.threads Integer scalar specifying the number of threads to use for index building.
 #' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed.
 #' Relevant for marker detection if \code{genes = NULL}, aggregation if \code{aggr.ref = TRUE}, and \code{NA} checking if \code{check.missing = TRUE}.
 #' @param restrict A character vector of gene names to use for marker selection.
 #' By default, all genes in \code{ref} are used.
+#' @param test.genes Character vector of the names of the genes in the test dataset, i.e., the row names of \code{test} in \code{\link{classifySingleR}}.
+#' If \code{NULL}, it is assumed that the test dataset and \code{ref} have the same genes in the same row order.
 #'
 #' @return 
 #' For a single reference, a \linkS4class{List} is returned containing:
@@ -78,9 +79,15 @@
 #' For each label, the top \code{de.n} genes with the largest differences compared to another label are chosen as markers to distinguish the two labels.
 #' The expression values are expected to be log-transformed and normalized.
 #'
-#' If \code{restrict} is specified, \code{ref} is subsetted to only include the rows with names that are in \code{restrict}.
-#' Marker selection and all subsequent classification will be performed using this restrictive subset of genes.
-#' This can be convenient for ensuring that only appropriate genes are used (e.g., not pseudogenes or predicted genes).
+#' Classification with \code{classifySingleR} assumes that the test dataset contains all marker genes that were detected from the reference.
+#' If the test and reference datasets do not have the same genes in the same order, we can set \code{test.genes} to the row names of the test dataset.
+#' This will instruct \code{trainSingleR} to only consider markers that are present in the test dataset.
+#' Any subsequent call to \code{classifySingleR} will also check that \code{test.genes} is consistent with \code{rownames(test)}.
+#'
+#' On a similar note, if \code{restrict} is specified, marker selection will only be performed using the specified subset of genes.
+#' This can be convenient for ignoring inappropriate genes like pseudogenes or predicted genes.
+#' It has the same effect as filtering out undesirable rows from \code{ref} prior to calling \code{trainSingleR}.
+#' Unlike \code{test.genes}, setting \code{restrict} does not introduce further checks on \code{rownames(test)} in \code{classifySingleR}.
 #'
 #' @section Custom feature specification:
 #' Rather than relying on the in-built feature selection, users can pass in their own features of interest to \code{genes}.
@@ -164,11 +171,15 @@
 #' 
 #' @export
 #' @importFrom S4Vectors List isSingleString metadata metadata<-
+#' @importFrom BiocNeighbors defineBuilder AnnoyParam KmknnParam
 #' @importFrom BiocParallel SerialParam bpisup bpstart bpstop
 #' @importFrom beachmat initializeCpp
+#' @importFrom S4Vectors List
+#' @importFrom SummarizedExperiment assay
 trainSingleR <- function(
     ref, 
     labels, 
+    test.genes=NULL,
     genes="de", 
     sd.thresh=NULL, 
     de.method=c("classic", "wilcox", "t"), 
@@ -195,70 +206,98 @@ trainSingleR <- function(
         }
     }
 
-    if (!bpisup(BPPARAM) && !is(BPPARAM, "MulticoreParam")) {
-        bpstart(BPPARAM)
-        on.exit(bpstop(BPPARAM))
-    }
-
-    ref <- lapply(ref, FUN=.to_clean_matrix, assay.type=assay.type, 
-        check.missing=check.missing, msg="ref", BPPARAM=BPPARAM)
-
-    # Cleaning the genes.
-    gns <- lapply(ref, rownames)
-    if (length(unique(gns))!=1L) {
-        stop("row names are not identical across references")
-    }
-
-    if (!is.null(restrict)) {
-        keep <- gns[[1]] %in% restrict
-        ref <- lapply(ref, FUN="[", i=keep, , drop=FALSE)
-    }
-
     if (isSingleString(genes)) {
         genes <- rep(genes, length(ref))
     } else if (length(genes)!=length(ref)) {
         stop("list-like 'genes' should be the same length as 'ref'")
     }
 
-    # Cleaning the labels.
-    labels <- lapply(labels, as.character)
-    if (length(labels)!=length(ref)) {
-        stop("lists in 'labels' and 'ref' should be of the same length")
+    if (is.null(BNPARAM)) {
+        if (approximate) {
+            BNPARAM <- AnnoyParam()
+        } else {
+            BNPARAM <- KmknnParam()
+        }
     }
 
-    for (l in seq_along(labels)) {
-        keep <- !is.na(labels[[l]])
+    if (!bpisup(BPPARAM) && !is(BPPARAM, "MulticoreParam")) {
+        bpstart(BPPARAM)
+        on.exit(bpstop(BPPARAM))
+    }
+
+    output <- vector("list", length(ref))
+    names(output) <- names(ref)
+    for (l in seq_along(ref)) {
+        curref <- .to_clean_matrix(ref[[l]], assay.type, check.missing, msg="ref", BPPARAM=BPPARAM)
+
+        curlabels <- as.character(labels[[l]])
+        stopifnot(length(curlabels) == ncol(curref))
+        keep <- !is.na(curlabels)
         if (!all(keep)) {
-            labels[[l]] <- labels[[l]][keep]
-            ref[[l]] <- ref[[l]][,keep,drop=FALSE]
+            curref <- curref[,keep,drop=FALSE]
+            curlabels <- curlabels[keep]
         }
-    }
 
-    gene.info <- mapply(FUN=.identify_genes, ref=ref, labels=labels, genes=genes,
-        MoreArgs=list(de.method=de.method, de.n=de.n, de.args=de.args, BPPARAM=BPPARAM),
-        SIMPLIFY=FALSE)
+        markers <- .identify_genes(
+            ref=curref,
+            labels=curlabels,
+            genes=genes[[l]],
+            de.method=de.method,
+            de.n=de.n,
+            de.args=de.args,
+            restrict=restrict,
+            test.genes=test.genes,
+            BPPARAM=BPPARAM
+        )
+
+        if (aggr.ref) {
+            aggr <- do.call(aggregateReference, c(list(ref=quote(curref), label=curlabels, check.missing=FALSE, BPPARAM=BPPARAM), aggr.args))
+            curref <- assay(aggr)
+            curlabels <- aggr$label
+        }
 
-    if (!solo && !recompute) {
-        .Deprecated(old="recompute = FALSE")
+        ulabels <- .get_levels(curlabels)
+        built <- .build_index(
+            ref=curref,
+            labels=curlabels,
+            ulabels=ulabels,
+            test.genes=test.genes,
+            markers=markers,
+            BNPARAM=BNPARAM,
+            num.threads=num.threads
+        )
+
+        output[[l]] <- List(
+            built = built,
+            ref = curref,
+            labels = list(full = curlabels, unique = ulabels),
+            markers = list(full = markers, unique = rownames(curref)[get_ref_subset(built) + 1]),
+            options = list(BNPARAM = BNPARAM, test.genes = test.genes)
+        )
     }
-    output <- mapply(FUN=.build_trained_index, ref=ref, labels=labels, markers=gene.info,
-        MoreArgs = list(aggr.ref=aggr.ref, aggr.args=aggr.args, BPPARAM=BPPARAM, approximate = approximate, num.threads = num.threads), 
-        SIMPLIFY=FALSE)
 
     if (solo) {
         output[[1]]
     } else {
-        final <- List(output)
-        metadata(final)$recompute <- recompute
-        final
+        List(output)
     }
 }
 
-.identify_genes <- function(ref, labels, genes="de", de.method="classic", de.n=NULL, de.args=list(), BPPARAM=BPPARAM) {
+.identify_genes <- function(ref, labels, genes, de.method, de.n, test.genes, restrict, de.args, BPPARAM) {
     if (length(labels)!=ncol(ref)) {
         stop("number of labels must be equal to number of cells")
     }
 
+    # Note that the genes are reported as names rather than indexing, so these
+    # row-subsetting operations don't require re-indexing of the output. We use
+    # DelayedArrays to avoid making copies of the data.
+    if (!is.null(restrict)) {
+        ref <- DelayedArray(ref)[rownames(ref) %in% restrict,,drop=FALSE]
+    }
+    if (!is.null(test.genes)) {
+        ref <- DelayedArray(ref)[rownames(ref) %in% test.genes,,drop=FALSE]
+    }
+
     if (.is_list(genes)) {
         is.char <- vapply(genes, is.character, TRUE)
         if (all(is.char)) {
@@ -284,36 +323,8 @@ trainSingleR <- function(
     genes
 }
 
-#' @importFrom S4Vectors List
-#' @importFrom SummarizedExperiment assay
-.build_trained_index <- function(ref, labels, markers, aggr.ref, aggr.args, search.info, approximate = FALSE, BPPARAM = SerialParam(), num.threads = 1) {
-    if (aggr.ref) {
-        aggr <- do.call(aggregateReference, c(list(ref=quote(ref), label=labels, check.missing=FALSE, BPPARAM=BPPARAM), aggr.args))
-        ref <- assay(aggr)
-        labels <- aggr$label
-    }
-
-    if (anyNA(labels)) {
-        keep <- !is.na(labels)
-        ref <- ref[,keep,drop=FALSE]
-        labels <- labels[keep]
-    }
-
-    ulabels <- .get_levels(labels)
-
-    built <- .build_index(ref, markers=markers, labels=labels, ulabels=ulabels, approximate=approximate, num.threads=num.threads)
-
-    List(
-        built = built,
-        ref = ref,
-        labels = list(full = labels, unique = ulabels),
-        markers = list(full = markers, unique = rownames(ref)[get_subset(built) + 1]),
-        options = list(approximate = approximate)
-    )
-}
-
 #' @importFrom beachmat initializeCpp
-.build_index <- function(ref, markers, labels, ulabels, approximate, num.threads) {
+.build_index <- function(ref, labels, ulabels, markers, test.genes, BNPARAM, num.threads) {
     for (m in seq_along(markers)) {
         current <- markers[[m]]
         for (n in seq_along(current)) {
@@ -326,8 +337,25 @@ trainSingleR <- function(
         markers[[m]] <- current
     }
 
+    if (is.null(test.genes)) {
+        test.genes <- ref.genes <- seq_len(nrow(ref))
+    } else {
+        universe <- union(test.genes, rownames(ref))
+        test.genes <- match(test.genes, universe)
+        ref.genes <- match(rownames(ref), universe)
+    }
+
+    builder <- defineBuilder(BNPARAM)
     parsed <- initializeCpp(ref)
-    prebuild(parsed, match(labels, ulabels) - 1L, markers, approximate = approximate, nthreads = num.threads)
+    train_single(
+        test_features=test.genes - 1L, 
+        ref=parsed,
+        ref_features=ref.genes - 1L,
+        labels=match(labels, ulabels) - 1L,
+        markers=markers,
+        builder=builder,
+        nthreads=num.threads
+    )
 }
 
 .get_levels <- function(labels) sort(unique(labels))
diff --git a/R/utils.R b/R/utils.R
index 39ea03d..80bfdec 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -23,10 +23,12 @@
         old <- getAutoBPPARAM()
         setAutoBPPARAM(BPPARAM)
         on.exit(setAutoBPPARAM(old))
-        discard <- rowAnyNAs(DelayedArray(x))
 
+        y <- DelayedArray(x)
+        discard <- rowAnyNAs(y)
         if (any(discard)) {
-            x <- x[!discard,,drop=FALSE]
+            # Returning a DelayedArray to avoid making an actual subset.
+            x <- y[!discard,,drop=FALSE]
         }
     }
 
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index e6bc6dc..264922e 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -2,6 +2,18 @@
 \title{SingleR News}
 \encoding{UTF-8}
 
+\section{Version 2.8.0}{\itemize{
+\item Added the \code{test.genes=} argument to \code{trainSingleR()}, to restrict marker detection to only those genes in the test dataset.
+This is also checked against \code{rownames(test)} in \code{classifySingleR()} to ensure that the test's feature space is consistent with the space used during training.
+
+\item Restored the \code{BNPARAM=} argument in \code{trainSingleR()}, to enable more fine-grained specification of neighbor search algorithms.
+The \code{approximate=} argument is deprecated. 
+
+\item Soft-deprecated \code{check.missing=} in \code{classifySingleR()} and \code{combineRecomputedResults()}.
+This is because any filtering will cause a mismatch between the row names of \code{tests} and the \code{test.genes} in \code{trained}.
+Rather, filtering should be done prior to \code{trainSingleR()}, as is done in the main \code{SingleR()} function.
+}}
+
 \section{Version 2.0.0}{\itemize{
 \item The format of the output of \code{trainSingleR()} has changed and is no longer back-compatible.
 \item \code{recompute=FALSE} in \code{trainSingleR()} does nothing; all integrated analyses are now done with \code{recompute=TRUE}.
diff --git a/inst/include/singlepp/BasicBuilder.hpp b/inst/include/singlepp/BasicBuilder.hpp
deleted file mode 100644
index 111f9bf..0000000
--- a/inst/include/singlepp/BasicBuilder.hpp
+++ /dev/null
@@ -1,317 +0,0 @@
-#ifndef SINGLEPP_BASIC_BUILDER_HPP
-#define SINGLEPP_BASIC_BUILDER_HPP
-
-#include "macros.hpp"
-
-#include "knncolle/knncolle.hpp"
-#include "tatami/tatami.hpp"
-
-#include "build_indices.hpp"
-#include "process_features.hpp"
-
-#include <vector>
-
-/**
- * @file BasicBuilder.hpp
- *
- * @brief Defines the `BasicBuilder` class.
- */
-
-namespace singlepp {
-
-/**
- * @brief Construct a single pre-built reference for further classification.
- *
- * This class prepares a single labelled reference dataset for classification of a test dataset.
- * We select the top marker genes to use for each pairwise comparison between labels,
- * and we use them to pre-compute the ranks in advance of computing the Spearman correlations.
- * The pre-built reference can then be used in `BasicScorer::run()`. 
- */
-class BasicBuilder {
-public:
-    /**
-     * @brief Default parameters for reference building.
-     */
-    struct Defaults {
-        /**
-         * See `set_top()` for details.
-         */
-        static constexpr int top = -1;
-
-        /**
-         * See `set_approximate()` for details.
-         */
-        static constexpr bool approximate = false;
-
-        /**
-         * See `set_num_threads()` for details.
-         */
-        static constexpr int num_threads = 1;
-    };
-
-private:
-    int top = Defaults::top;
-    bool approximate = Defaults::approximate;
-    int nthreads = Defaults::num_threads;
-
-public:
-    /**
-     * @param t Number of top markers to use from each pairwise comparison between labels, see `Classifier::set_top()`.
-     *
-     * @return A reference to this `BasicBuilder` object.
-     */
-    BasicBuilder& set_top(int t = Defaults::top) {
-        top = t;
-        return *this;
-    }
-
-    /**
-     * @param a Whether to use an approximate method to quickly find the quantile, see `Classifier::set_approximate()`.
-     *
-     * @return A reference to this `BasicBuilder` object.
-     */
-    BasicBuilder& set_approximate(bool a = Defaults::approximate) {
-        approximate = a;
-        return *this;
-    }
-
-    /**
-     * @param n Number of threads to use.
-     *
-     * @return A reference to this `BasicBuilder` object.
-     */
-    BasicBuilder& set_num_threads(int n = Defaults::num_threads) {
-        nthreads = n;
-        return *this;
-    }
-
-private:
-    std::vector<Reference> build_internal(const tatami::Matrix<double, int>* ref, const int* labels, const std::vector<int>& subset) const {
-        std::vector<Reference> subref;
-        if (approximate) {
-            subref = build_indices(
-                ref, 
-                labels, 
-                subset, 
-                [](size_t nr, size_t nc, const double* ptr) { 
-                    return std::shared_ptr<knncolle::Base<int, double> >(new knncolle::AnnoyEuclidean<int, double>(nr, nc, ptr)); 
-                },
-                nthreads
-            );
-        } else {
-            subref = build_indices(
-                ref, 
-                labels, 
-                subset,
-                [](size_t nr, size_t nc, const double* ptr) { 
-                    return std::shared_ptr<knncolle::Base<int, double> >(new knncolle::KmknnEuclidean<int, double>(nr, nc, ptr)); 
-                },
-                nthreads
-            );
-        }
-        return subref;
-    }
-
-public:
-    /**
-     * @brief Prebuilt reference that can be directly used for annotation.
-     */
-    struct Prebuilt {
-        /**
-         * @cond
-         */
-        Prebuilt(Markers m, std::vector<int> s, std::vector<Reference> r) :
-            markers(std::move(m)), subset(std::move(s)), references(std::move(r)) {}
-        /**
-         * @endcond
-         */
-
-        /**
-         * A vector of vectors of ranked marker genes to be used in the classification.
-         * Values are indices into the `subset` vector.
-         * The set of marker genes is typically a subset of those in the input `markers` in `build()`.
-         */
-        Markers markers;
-
-        /**
-         * The subset of features in the test/reference datasets that were used in the classification.
-         * Values are row indices into the relevant matrices.
-         */
-        std::vector<int> subset;
-
-        /**
-         * @return Number of labels in this reference.
-         */
-        size_t num_labels() const {
-            return references.size();
-        }
-
-        /**
-         * @return Number of profiles in this reference.
-         */
-        size_t num_profiles() const {
-            size_t n = 0;
-            for (const auto& ref : references) {
-                n += ref.ranked.size();
-            }
-            return n;
-        }
-
-        /**
-         * @cond
-         */
-        std::vector<Reference> references;
-        /**
-         * @endcond
-         */
-    };
-
-    /**
-     * @param ref Matrix for the reference expression profiles.
-     * Rows are genes while columns are samples.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     *
-     * @return A `Prebuilt` instance that can be used in `run()` for annotation of a test dataset.
-     */
-    Prebuilt run(const tatami::Matrix<double, int>* ref, const int* labels, Markers markers) const {
-        auto subset = subset_markers(markers, top);
-        auto subref = build_internal(ref, labels, subset);
-        return Prebuilt(std::move(markers), std::move(subset), std::move(subref));
-    }
-
-public:
-    /**
-     * @brief Prebuilt reference requiring an intersection of features. 
-     */
-    struct PrebuiltIntersection {
-        /**
-         * @cond
-         */
-        PrebuiltIntersection(Markers m, std::vector<int> mats, std::vector<int> refs, std::vector<Reference> r) :
-            markers(std::move(m)), mat_subset(std::move(mats)), ref_subset(std::move(refs)), references(std::move(r)) {}
-        /**
-         * @endcond
-         */
-
-        /**
-         * A vector of vectors of ranked marker genes to be used in the classification.
-         * Values are indices into the `mat_subset` and `ref_subset` vectors for the respective matrices.
-         * The set of marker genes is typically a subset of those in the input `markers` in `build()`.
-         */
-        Markers markers;
-
-        /**
-         * Row indices of test dataset, specifying the features in the intersection.
-         * This has the same length as `ref_subset`, where corresponding entries refer to the same features in the respective datasets.
-         */
-        std::vector<int> mat_subset;
-
-        /**
-         * Row indices of reference dataset, specifying the features in the intersection.
-         * This has the same length as `mat_subset`, where corresponding entries refer to the same features in the respective datasets.
-         */
-        std::vector<int> ref_subset;
-
-        /**
-         * @return Number of labels in this reference.
-         */
-        size_t num_labels() const {
-            return references.size();
-        }
-
-        /**
-         * @return Number of profiles in this reference.
-         */
-        size_t num_profiles() const {
-            size_t n = 0;
-            for (const auto& ref : references) {
-                n += ref.ranked.size();
-            }
-            return n;
-        }
-
-        /**
-         * @cond
-         */
-        std::vector<Reference> references;
-        /**
-         * @endcond
-         */
-    };
-
-    /**
-     * @param intersection Vector defining the intersection of features betweent the test and reference datasets.
-     * Each entry is a pair where the first element is the row index in the test matrix,
-     * and the second element is the row index for the corresponding feature in the reference matrix.
-     * Each row index for either matrix should occur no more than once in `intersection`.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     *
-     * @return A `PrebuiltIntersection` instance that can be used in `run()` for annotation of a test dataset with the same order of genes as specified in `mat_id`.
-     *
-     * This method deals with the case where the genes are not in the same order and number across the test and reference datasets.
-     * It finds the intersection of genes and then prepares the references accordingly.
-     */
-    PrebuiltIntersection run(
-        std::vector<std::pair<int, int> > intersection,
-        const tatami::Matrix<double, int>* ref, 
-        const int* labels,
-        Markers markers)
-    const {
-        // Sorting it if it wasn't already.
-        for (size_t i = 1, end = intersection.size(); i < end; ++i) {
-            if (intersection[i] < intersection[i-1]) {
-                std::sort(intersection.begin(), intersection.end());
-                break;
-            }
-        }
-
-        subset_markers(intersection, markers, top);
-        auto pairs = unzip(intersection);
-        auto subref = build_internal(ref, labels, pairs.second);
-        return PrebuiltIntersection(std::move(markers), std::move(pairs.first), std::move(pairs.second), std::move(subref));
-    }
-
-    /**
-     * @tparam Id Gene identifier for each row.
-     *
-     * @param mat_nrow Number of rows (genes) in the test dataset.
-     * @param[in] mat_id Pointer to an array of identifiers of length equal to `mat_nrow`.
-     * This should contain a unique identifier for each row of `mat` (typically a gene name or index).
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] ref_id Pointer to an array of identifiers of length equal to the number of rows of any `ref`.
-     * This should contain a unique identifier for each row in `ref`, and should be comparable to `mat_id`.
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     *
-     * @return A `PrebuiltIntersection` instance that can be used in `run()` for annotation of a test dataset with the same order of genes as specified in `mat_id`.
-     *
-     * This method deals with the case where the genes are not in the same order and number across the test and reference datasets.
-     * It finds the intersection of genes and then prepares the references accordingly.
-     */
-    template<class Id>
-    PrebuiltIntersection run(
-        size_t mat_nrow,
-        const Id* mat_id, 
-        const tatami::Matrix<double, int>* ref, 
-        const Id* ref_id, 
-        const int* labels,
-        Markers markers) 
-    const {
-        auto intersection = intersect_features(mat_nrow, mat_id, ref->nrow(), ref_id);
-        return run(std::move(intersection), ref, labels, std::move(markers));
-    }
-};
-
-}
-
-#endif
diff --git a/inst/include/singlepp/BasicScorer.hpp b/inst/include/singlepp/BasicScorer.hpp
deleted file mode 100644
index af4db05..0000000
--- a/inst/include/singlepp/BasicScorer.hpp
+++ /dev/null
@@ -1,292 +0,0 @@
-#ifndef SINGLEPP_BASIC_SCORER_HPP
-#define SINGLEPP_BASIC_SCORER_HPP
-
-#include "macros.hpp"
-
-#include "tatami/tatami.hpp"
-
-#include "annotate_cells.hpp"
-#include "process_features.hpp"
-#include "BasicBuilder.hpp"
-
-#include <vector> 
-#include <stdexcept>
-
-/**
- * @file BasicScorer.hpp
- *
- * @brief Defines the `BasicScorer` class.
- */
-
-namespace singlepp {
-
-/**
- * @brief Classify cells from a single pre-built reference dataset.
- *
- * This class uses the pre-built reference from `BasicBuilder` to classify each cell in a test dataset.
- * The algorithm and parameters are the same as described for the `Classifier` class;
- * in fact, `Classifier` just calls `BasicBuilder::run()` and then `BasicScorer::run()`.
- *
- * It is occasionally useful to call these two functions separately if the same reference dataset is to be used multiple times,
- * e.g., on different test datasets or with different parameters.
- * In such cases, we can save time by avoiding redundant builds;
- * we just have to call `BasicScorer::run()` in all subsequent uses of the pre-built reference.
- */
-class BasicScorer {
-public:
-    /**
-     * @brief Default parameters for classification.
-     */
-    struct Defaults {
-        /**
-         * See `set_quantile()` for details.
-         */
-        static constexpr double quantile = 0.8;
-
-        /**
-         * See `set_fine_tune_threshold()` for details.
-         */
-        static constexpr double fine_tune_threshold = 0.05;
-
-        /**
-         * See `set_fine_tune()` for details.
-         */
-        static constexpr bool fine_tune = true;
-
-        /**
-         * See `set_num_threads()` for details.
-         */
-        static constexpr int num_threads = 1;
-    };
-
-private:
-    double quantile = Defaults::quantile;
-    double fine_tune_threshold = Defaults::fine_tune_threshold;
-    bool fine_tune = Defaults::fine_tune;
-    int nthreads = Defaults::num_threads;
-
-public:
-    /**
-     * @param q Quantile to use to compute a per-label score from the correlations, see `Classifier::set_quantile()`.
-     *
-     * @return A reference to this `BasicScorer` object.
-     */
-    BasicScorer& set_quantile(double q = Defaults::quantile) {
-        quantile = q;
-        return *this;
-    }
-
-    /**
-     * @param t Threshold to use to select the top-scoring subset of labels during fine-tuning, see `Classifier::set_fine_tune_threshold()`.
-     *
-     * @return A reference to this `BasicScorer` object.
-     */
-    BasicScorer& set_fine_tune_threshold(double t = Defaults::fine_tune_threshold) {
-        fine_tune_threshold = t;
-        return *this;
-    }
-
-    /**
-     * @param f Whether to perform fine-tuning, see `Classifier::set_fine_tune()`.
-     *
-     * @return A reference to this `BasicScorer` object.
-     */
-    BasicScorer& set_fine_tune(bool f = Defaults::fine_tune) {
-        fine_tune = f;
-        return *this;
-    }
-
-    /**
-     * @param n Number of threads to use.
-     *
-     * @return A reference to this `BasicScorer` object.
-     */
-    BasicScorer& set_num_threads(int n = Defaults::num_threads) {
-        nthreads = n;
-        return *this;
-    }
-
-public:
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * This may have a different ordering of genes compared to the reference matrix used to create `built`,
-     * provided that all genes corresponding to `built.subset` are present.
-     * @param built An object produced by `BasicBuilder::build()`.
-     * @param[in] mat_subset Pointer to an array of length equal to that of `built.subset`,
-     * containing the index of the row of `mat` corresponding to each gene in `built.subset`.
-     * That is, row `mat_subset[i]` in `mat` should be the same gene as row `built.subset[i]` in the reference matrix.
-     * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the index of the assigned label for each cell.
-     * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`.
-     * On output, this is filled with the (non-fine-tuned) score for each label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
-     * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
-     * This may also be `NULL` in which case the deltas are not reported.
-     */
-    void run(const tatami::Matrix<double, int>* mat, const BasicBuilder::Prebuilt& built, const int* mat_subset, int* best, std::vector<double*>& scores, double* delta) const {
-        annotate_cells_simple(
-            mat, 
-            built.subset.size(), 
-            mat_subset, 
-            built.references, 
-            built.markers, 
-            quantile, 
-            fine_tune, 
-            fine_tune_threshold, 
-            best, 
-            scores, 
-            delta,
-            nthreads
-        );
-        return;
-    }
-
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * This should have the same order and identity of genes as the reference matrix used to create `built`.
-     * @param built An object produced by `BasicBuilder::build()`.
-     * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the index of the assigned label for each cell.
-     * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`.
-     * On output, this is filled with the (non-fine-tuned) score for each label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
-     * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
-     * This may also be `NULL` in which case the deltas are not reported.
-     */
-    void run(const tatami::Matrix<double, int>* mat, const BasicBuilder::Prebuilt& built, int* best, std::vector<double*>& scores, double* delta) const {
-        run(mat, built, built.subset.data(), best, scores, delta);
-        return;
-    }
-
-public:
-    /**
-     * @brief Automated classification results.
-     */
-    struct Results {
-        /**
-         * @cond
-         */
-        Results(size_t ncells, size_t nlabels) : best(ncells), scores(nlabels, std::vector<double>(ncells)), delta(ncells) {}
-
-        std::vector<double*> scores_to_pointers() {
-            std::vector<double*> output(scores.size());
-            for (size_t s = 0; s < scores.size(); ++s) {
-                output[s] = scores[s].data();
-            }
-            return output;
-        };
-        /**
-         * @endcond
-         */
-
-        /** 
-         * Vector of length equal to the number of cells in the test dataset,
-         * containing the index of the assigned label for each cell.
-         */
-        std::vector<int> best;
-
-        /**
-         * Vector of length equal to the number of labels,
-         * containing vectors of length equal to the number of cells in the test dataset.
-         * Each vector corresponds to a label and contains the (non-fine-tuned) score for each cell.
-         */
-        std::vector<std::vector<double> > scores;
-
-        /** 
-         * Vector of length equal to the number of cells in the test dataset.
-         * This contains the difference between the highest and second-highest scores for each cell, possibly after fine-tuning.
-         */
-        std::vector<double> delta;
-    };
-
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * Each row should correspond to an element of `Prebuilt::subset`.
-     * @param built An object produced by `BasicBuilder::build()`.
-     *
-     * @return A `Results` object containing the assigned labels and scores.
-     */
-    Results run(const tatami::Matrix<double, int>* mat, const BasicBuilder::Prebuilt& built) const {
-        size_t nlabels = built.references.size();
-        Results output(mat->ncol(), nlabels);
-        auto scores = output.scores_to_pointers();
-        run(mat, built, output.best.data(), scores, output.delta.data());
-        return output;
-    }
-
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * This may have a different ordering of genes compared to the reference matrix used in `build()`,
-     * provided that all genes corresponding to `Prebuilt::subset` are present.
-     * @param built An object produced by `BasicBuilder::build()`.
-     * @param[in] mat_subset Pointer to an array of length equal to that of `Prebuilt::subset`,
-     * containing the index of the row of `mat` corresponding to each gene in `Prebuilt::subset`.
-     *
-     * @return A `Results` object containing the assigned labels and scores.
-     */
-    Results run(const tatami::Matrix<double, int>* mat, const BasicBuilder::Prebuilt& built, const int* mat_subset) const {
-        size_t nlabels = built.references.size();
-        Results output(mat->ncol(), nlabels);
-        auto scores = output.scores_to_pointers();
-        run(mat, built, mat_subset, output.best.data(), scores, output.delta.data());
-        return output;
-    }
-
-public:
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * @param built An object produced by `build()` with intersections.
-     * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the index of the assigned label for each cell.
-     * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`.
-     * On output, this is filled with the (non-fine-tuned) score for each label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
-     * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, tkkhis is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
-     * This may also be `NULL` in which case the deltas are not reported.
-     */
-    void run(
-        const tatami::Matrix<double, int>* mat, 
-        const BasicBuilder::PrebuiltIntersection& built,
-        int* best,
-        std::vector<double*>& scores,
-        double* delta) 
-    const {
-        annotate_cells_simple(mat, 
-            built.mat_subset.size(), 
-            built.mat_subset.data(), 
-            built.references, 
-            built.markers, 
-            quantile, 
-            fine_tune, 
-            fine_tune_threshold, 
-            best, 
-            scores, 
-            delta,
-            nthreads
-        );
-        return;
-    }
-
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * @param built An object produced by `build()` with intersections.
-     *
-     * @return A `Results` object containing the assigned labels and scores.
-     */ 
-    Results run(const tatami::Matrix<double, int>* mat, const BasicBuilder::PrebuiltIntersection& built) const {
-        size_t nlabels = built.references.size();
-        Results output(mat->ncol(), nlabels);
-        auto scores = output.scores_to_pointers();
-
-        run(mat, built, output.best.data(), scores, output.delta.data());
-        return output;
-    }
-};
-
-}
-
-
-#endif
diff --git a/inst/include/singlepp/ChooseClassicMarkers.hpp b/inst/include/singlepp/ChooseClassicMarkers.hpp
deleted file mode 100644
index c84feea..0000000
--- a/inst/include/singlepp/ChooseClassicMarkers.hpp
+++ /dev/null
@@ -1,292 +0,0 @@
-#ifndef SINGLEPP_CHOOSE_CLASSIC_MARKERS_HPP
-#define SINGLEPP_CHOOSE_CLASSIC_MARKERS_HPP
-
-#include "macros.hpp"
-
-#include "tatami/tatami.hpp"
-
-#include "Markers.hpp"
-
-#include <vector>
-#include <cmath>
-#include <set>
-
-/**
- * @file ChooseClassicMarkers.hpp
- *
- * @brief Classic method for choosing markers.
- */
-
-namespace singlepp {
-
-/**
- * @brief Classic method for choosing markers.
- *
- * This class implements the classic **SingleR** method for choosing markers from (typically bulk) reference dataasets.
- * We assume that we have a matrix of representative log-expression profiles for each label, typically computed by taking some average across all reference profiles for that label.
- * For the comparison between labels A and B, we define the marker set as the top genes with the largest positive differences in A's profile over B (i.e., the log-fold change).
- * The number of top genes can either be explicitly specified or it can be automatically determined from the number of labels.
- *
- * If multiple references are present, we compute the sum of the log-fold changes across all references with both labels A and B.
- * The ordering of the top genes is then performed using the sum across references.
- * It is assumed that all references have the same number and ordering of features in their rows.
- */
-class ChooseClassicMarkers {
-public:
-    /**
-     * @brief Default parameter settings.
-     */
-    struct Defaults {
-        /**
-         * See `set_number()` for more details.
-         */
-        static constexpr int number = -1;
-
-        /**
-         * See `set_num_threads()` for more details.
-         */
-        static constexpr int num_threads = 1;
-    };
-
-private:
-    int number = Defaults::number;
-    int nthreads = Defaults::num_threads;
-
-public:
-    /**
-     * @param n Number of top genes to use as the marker set in each pairwise comparison.
-     * If -1, this is automatically determined from the number of labels, see `number_of_markers()`.
-     *
-     * @return A reference to this `ChooseClassicMarkers` object.
-     */
-    ChooseClassicMarkers& set_number(int n = Defaults::number) {
-        number = n;
-        return *this;
-    }
-
-    /**
-     * @param nlabels Number of labels in the reference(s).
-     *
-     * @return An appropriate number of markers for each pairwise comparison.
-     *
-     * The exact expression is defined as \f$500 (\frac{2}{3})^{\log_2{N}}\f$ for \f$N\f$ labels,
-     * which steadily decreases the markers per comparison as the number of labels increases.
-     * This aims to avoid an excessive number of features when dealing with references with many labels.
-     */
-    static int number_of_markers(int nlabels) {
-        return std::round(500.0 * std::pow(2.0/3.0, std::log(static_cast<double>(nlabels)) / std::log(2.0)));
-    }
-
-    /**
-     * @param n Number of threads to use.
-     *
-     * @return A reference to this `ChooseClassicMarkers` object.
-     */
-    ChooseClassicMarkers& set_num_threads(int n = Defaults::num_threads) {
-        nthreads = n;
-        return *this;
-    }
-
-public:
-    /**
-     * @tparam Matrix A **tatami** matrix.
-     * @tparam Label Integer type for the label identity.
-     *
-     * @param representatives Vector of representative matrices.
-     * Each matrix should contain one column per label; each column should have a representative log-expression profile for that label.
-     * All matrices should have the same number of rows, corresponding to the same features.
-     * @param labels Vector of pointers of length equal to `representatives`.
-     * Each array should be of length equal to the number of columns of the corresponding entry of `representatives`.
-     * Each value of the array should specify the label for the corresponding column in its matrix.
-     * Values should lie in $[0, N)$ for $N$ unique labels across all entries of `labels`.
-     *
-     * @return A `Markers` object containing the top markers for each pairwise comparison between labels.
-     */
-    template<class Matrix, typename Label>
-    Markers run(const std::vector<const Matrix*>& representatives, const std::vector<const Label*>& labels) const {
-        /**
-         * @cond
-         */
-        size_t nrefs = representatives.size();
-        if (nrefs != labels.size()) {
-            throw std::runtime_error("'representatives' and 'labels' should have the same length");
-        }
-        if (nrefs == 0) {
-            throw std::runtime_error("'representatives' should contain at least one entry");
-        }
-        size_t ngenes = representatives.front()->nrow();
-
-        // Determining the total number of labels.
-        int nlabels = 0;
-        for (size_t r = 0; r < nrefs; ++r) {
-            size_t ncols = representatives[r]->ncol();
-            auto curlab = labels[r];
-            for (size_t c = 0; c < ncols; ++c) {
-                if (nlabels <= curlab[c]) {
-                    nlabels = curlab[c] + 1;
-                }
-            }
-        }
-
-        // Generating mappings.
-        std::vector<std::vector<int> > labels_to_index(nrefs, std::vector<int>(nlabels, -1));
-        for (size_t r = 0; r < nrefs; ++r) {
-            size_t ncols = representatives[r]->ncol();
-            auto curlab = labels[r];
-            auto& current = labels_to_index[r];
-            for (size_t c = 0; c < ncols; ++c) {
-                auto& dest = current[curlab[c]];
-                if (dest != -1) {
-                    throw std::runtime_error("each label should correspond to no more than one column in each reference");
-                }
-                current[curlab[c]] = c;
-            }
-        }
-
-        // Generating pairs for compute; this sacrifices some memory for convenience.
-        std::vector<std::pair<int, int> > pairs;
-        {
-            std::set<std::pair<int, int> > pairs0;
-            for (size_t r = 0; r < nrefs; ++r) {
-                size_t ncols = representatives[r]->ncol();
-                auto curlab = labels[r];
-                for (size_t c1 = 0; c1 < ncols; ++c1) {
-                    for (size_t c2 = 0; c2 < c1; ++c2) {
-                        pairs0.emplace(curlab[c1], curlab[c2]);
-                    }
-                }
-            }
-            pairs.insert(pairs.end(), pairs0.begin(), pairs0.end());
-            std::sort(pairs.begin(), pairs.end());
-        }
-        size_t npairs = pairs.size();
-
-        Markers output(nlabels, std::vector<std::vector<int> >(nlabels));
-
-        int actual_number = number;
-        if (number < 0) {
-            actual_number = number_of_markers(nlabels);
-        } 
-        if (actual_number > static_cast<int>(ngenes)) {
-            actual_number = ngenes;
-        }
-
-#ifndef SINGLEPP_CUSTOM_PARALLEL
-        #pragma omp parallel num_threads(nthreads)
-        {
-#else
-        SINGLEPP_CUSTOM_PARALLEL([&](int, size_t start, size_t len) -> void {
-#endif
-            
-            std::vector<std::pair<double, int> > sorter(ngenes), sorted_copy(ngenes);
-            typedef typename  Matrix::value_type Value_;
-            typedef typename  Matrix::index_type Index_;
-            std::vector<Value_> rbuffer(ngenes), lbuffer(ngenes);
-            std::vector<std::shared_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > > rworks(nrefs), lworks(nrefs);
-
-#ifndef SINGLEPP_CUSTOM_PARALLEL
-            #pragma omp for
-            for (size_t p = 0; p < npairs; ++p) {
-#else
-            for (size_t p = start, end = start + len; p < end; ++p) {
-#endif
-
-                auto curleft = pairs[p].first;
-                auto curright = pairs[p].second;
-
-                auto sIt = sorter.begin();
-                for (int g = 0; g < ngenes; ++g, ++sIt) {
-                    sIt->first = 0;
-                    sIt->second = g;
-                }
-
-                for (size_t i = 0; i < nrefs; ++i) {
-                    const auto& curavail = labels_to_index[i];
-                    auto lcol = curavail[curleft];
-                    auto rcol = curavail[curright];
-                    if (lcol == -1 || rcol == -1) {
-                        continue;                            
-                    }
-
-                    // Initialize extractors as needed.
-                    auto& lwrk = lworks[i];
-                    if (!lwrk) {
-                        lwrk = representatives[i]->dense_column();
-                    }
-                    auto lptr = lwrk->fetch(lcol, lbuffer.data());
-
-                    auto& rwrk = rworks[i];
-                    if (!rwrk) {
-                        rwrk = representatives[i]->dense_column();
-                    }
-                    auto rptr = rwrk->fetch(rcol, rbuffer.data());
-
-                    auto sIt = sorter.begin();
-                    for (int g = 0; g < ngenes; ++g, ++lptr, ++rptr, ++sIt) {
-                        sIt->first += *lptr - *rptr; 
-                    }
-                }
-
-                for (int flip = 0; flip < 2; ++flip) {
-                    // Flipping the log-fold changes so we do the reverse. 
-                    // We do a copy so that the treatment of tries would be the same as if
-                    // we had sorted on the reversed log-fold changes in the first place;
-                    // otherwise the first sort might change the order of ties.
-                    if (flip) {
-                        sorter = sorted_copy;
-                        for (auto& s : sorter) {
-                            s.first *= -1;
-                        }
-                    } else {
-                        sorted_copy = sorter;
-                    }
-
-                    // partial sort is guaranteed to be stable due to the second index resolving ties.
-                    std::partial_sort(sorter.begin(), sorter.begin() + actual_number, sorter.end());
-
-                    std::vector<int> stuff;
-                    stuff.reserve(actual_number);
-                    for (int g = 0; g < actual_number && sorter[g].first < 0; ++g) { // only keeping those with positive log-fold changes (negative after reversing).
-                        stuff.push_back(sorter[g].second); 
-                    }
-
-                    if (flip) {
-                        output[curleft][curright] = std::move(stuff);
-                    } else {
-                        output[curright][curleft] = std::move(stuff);
-                    }
-                }
-            }
-
-#ifndef SINGLEPP_CUSTOM_PARALLEL
-        }
-#else    
-        }, npairs, nthreads);
-#endif        
-        /**
-         * @endcond
-         */
-
-        return output;
-    }
-
-    /**
-     * @tparam Matrix A **tatami** matrix.
-     *
-     * @param representative A representative matrix, containing one column per label.
-     * Each column should have a representative log-expression profile for that label.
-     * @param labels Pointer to an array of length equal to the number of columns in `representative`.
-     * Each value of the array should specify the label for the corresponding column. 
-     * Values should lie in $[0, N)$ for $N$ unique labels. 
-     *
-     * @return A `Markers` object containing the top markers for each pairwise comparison between labels.
-     */
-    template<class Matrix>
-    Markers run(const Matrix* representative, const int* labels) const {
-        return run(std::vector<const Matrix*>{ representative }, std::vector<const int*>{ labels });
-    }
-};
-
-}
-
-#endif
diff --git a/inst/include/singlepp/Classifier.hpp b/inst/include/singlepp/Classifier.hpp
deleted file mode 100644
index 8d784fc..0000000
--- a/inst/include/singlepp/Classifier.hpp
+++ /dev/null
@@ -1,312 +0,0 @@
-#ifndef SINGLEPP_CLASSIFIER_HPP
-#define SINGLEPP_CLASSIFIER_HPP
-
-#include "macros.hpp"
-
-#include "tatami/tatami.hpp"
-
-#include "BasicBuilder.hpp"
-#include "BasicScorer.hpp"
-
-#include <vector> 
-#include <stdexcept>
-
-/**
- * @file Classifier.hpp
- *
- * @brief Defines the `Classifier` class.
- */
-
-namespace singlepp {
-
-/**
- * @brief Automatically assign cell type labels based on an expression matrix.
- *
- * This implements the [**SingleR**](https://bioconductor.org/packages/SingleR) algorithm for automated annotation of single-cell RNA-seq data.
- * For each cell, we compute the Spearman rank correlation between that cell and the reference expression profiles.
- * This is done using only the subset of genes that are label-specific markers,
- * most typically the top genes from pairwise comparisons between each label's expression profiles.
- * For each label, we take the correlations involving that label's reference profiles and convert it into a score.
- * The label with the highest score is used as an initial label for that cell.
- *
- * For each cell, we apply fine-tuning iterations to improve the label accuracy by refining the feature space.
- * At each iteration, we find the subset of labels with scores that are close to the maximum score according to some threshold.
- * We recompute the scores based on the markers for this label subset, and we repeat the process until only one label is left in the subset or the subset is unchanged.
- * At the end of the iterations, the label with the highest score (or the only label, if just one is left) is used as the label for the cell.
- * This process aims to remove noise by eliminating irrelevant genes when attempting to distinguish closely related labels.
- * 
- * Each label's score is defined as a user-specified quantile of the distribution of correlations across all reference profiles assigned to that label.
- * (We typically consider a large quantile, e.g., the 80% percentile of the correlations.)
- * The use of a quantile avoids problems with differences in the number of reference profiles per label;
- * in contrast, just using the "top X correlations" would implicitly favor labels with more reference profiles.
- *
- * The choice of Spearman's correlation provides some robustness against batch effects when comparing reference and test datasets.
- * Only the relative expression _within_ each cell needs to be comparable, not their relative expression across cells.
- * As a result, it does not matter whether raw counts are supplied or log-transformed expression values, as the latter is a monotonic transformation of the latter (within each cell).
- * The algorithm is also robust to differences in technologies between reference and test profiles, though it is preferable to have like-for-like comparisons. 
- *
- * @see
- * Aran D et al. (2019). 
- * Reference-based analysis of lung single-cell sequencing reveals a transitional profibrotic macrophage.
- * _Nat. Immunol._ 20, 163-172
- */
-class Classifier {
-public:
-    /**
-     * @brief Default parameters for annotation.
-     */
-    struct Defaults {
-        /**
-         * See `set_quantile()` for details.
-         */
-        static constexpr double quantile = BasicScorer::Defaults::quantile;
-
-        /**
-         * See `set_fine_tune_threshold()` for details.
-         */
-        static constexpr double fine_tune_threshold = BasicScorer::Defaults::fine_tune_threshold;
-
-        /**
-         * See `set_fine_tune()` for details.
-         */
-        static constexpr bool fine_tune = BasicScorer::Defaults::fine_tune;
-
-        /**
-         * See `set_top()` for details.
-         */
-        static constexpr int top = BasicBuilder::Defaults::top;
-
-        /**
-         * See `set_approximate()` for details.
-         */
-        static constexpr bool approximate = BasicBuilder::Defaults::approximate;
-
-        /**
-         * See `set_num_threads()` for details.
-         */
-        static constexpr int num_threads = 1;
-    };
-
-private:
-    int top = Defaults::top;
-    bool approximate = Defaults::approximate;
-    double quantile = Defaults::quantile;
-    double fine_tune_threshold = Defaults::fine_tune_threshold;
-    bool fine_tune = Defaults::fine_tune;
-    int nthreads = Defaults::num_threads;
-
-public:
-    /**
-     * @param q Quantile to use to compute a per-label score from the correlations.
-     *
-     * @return A reference to this `Classifier` object.
-     *
-     * Values of `q` closer to 0.5 focus on the behavior of the majority of a label's reference profiles.
-     * Smaller values will be more sensitive to the presence of a subset of profiles that are more similar to the test cell,
-     * which can be useful when the reference profiles themselves are heterogeneous.
-     */
-    Classifier& set_quantile(double q = Defaults::quantile) {
-        quantile = q;
-        return *this;
-    }
-
-    /**
-     * @param t Threshold to use to select the top-scoring subset of labels during fine-tuning.
-     * Larger values increase the chance of recovering the correct label at the cost of computational time.
-     *
-     * @return A reference to this `Classifier` object.
-     *
-     * Needless to say, one should not set `t` to a value that is too large.
-     * Otherwise, the first fine-tuning iteration would just contain all labels and there would be no reduction of the marker space.
-     */
-    Classifier& set_fine_tune_threshold(double t = Defaults::fine_tune_threshold) {
-        fine_tune_threshold = t;
-        return *this;
-    }
-
-    /**
-     * @param f Whether to perform fine-tuning.
-     * This can be disabled to improve speed at the cost of accuracy.
-     *
-     * @return A reference to this `Classifier` object.
-     */
-    Classifier& set_fine_tune(bool f = Defaults::fine_tune) {
-        fine_tune = f;
-        return *this;
-    }
-
-    /**
-     * @param t Number of top markers to use from each pairwise comparison between labels.
-     * Larger values improve the stability of the correlations at the cost of increasing noise and computational work.
-     *
-     * Setting it to a negative value will instruct `run()` to use all supplied markers.
-     * This is useful in situations where the supplied markers have already been curated.
-     *
-     * @return A reference to this `Classifier` object.
-     */
-    Classifier& set_top(int t = Defaults::top) {
-        top = t;
-        return *this;
-    }
-
-    /**
-     * @param a Whether to use an approximate method to quickly find the quantile.
-     * This sacrifices some accuracy for speed when labels have many reference profiles.
-     *
-     * @return A reference to this `Classifier` object.
-     */
-    Classifier& set_approximate(bool a = Defaults::approximate) {
-        approximate = a;
-        return *this;
-    }
-
-    /**
-     * @param n Number of threads to use.
-     *
-     * @return A reference to this `Classifier` object.
-     */
-    Classifier& set_num_threads(int n = Defaults::num_threads) {
-        nthreads = n;
-        return *this;
-    }
-
-private:
-    BasicBuilder::Prebuilt build_reference(const tatami::Matrix<double, int>* ref, const int* labels, Markers markers) const {
-        BasicBuilder builder;
-        builder
-            .set_top(top)
-            .set_approximate(approximate)
-            .set_num_threads(nthreads);
-        return builder.run(ref, labels, std::move(markers));
-    }
-
-    template<class Id>
-    BasicBuilder::PrebuiltIntersection build_reference(size_t mat_nrow, const Id* mat_id, const tatami::Matrix<double, int>* ref, const Id* ref_id, const int* labels, Markers markers) const {
-        BasicBuilder builder;
-        builder
-            .set_top(top)
-            .set_approximate(approximate)
-            .set_num_threads(nthreads);
-        return builder.run(mat_nrow, mat_id, ref, ref_id, labels, std::move(markers));
-    }
-
-    BasicScorer set_up_scorer() const {
-        BasicScorer scorer;
-        scorer
-            .set_quantile(quantile)
-            .set_fine_tune(fine_tune)
-            .set_fine_tune_threshold(fine_tune_threshold)
-            .set_num_threads(nthreads);
-        return scorer;
-    }
-
-public:
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * @param ref An expression matrix for the reference expression profiles.
-     * This should have non-zero columns and the same number of rows (i.e., genes) at `mat`.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the index of the assigned label for each cell.
-     * @param[out] scores Vector of pointers of length equal to the number of labels.
-     * Each pointer should point to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the (non-fine-tuned) score for that label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
-     * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
-     * This may also be `NULL` in which case the deltas are not reported.
-     */
-    void run(const tatami::Matrix<double, int>* mat, const tatami::Matrix<double, int>* ref, const int* labels, Markers markers, int* best, std::vector<double*>& scores, double* delta) const {
-        auto prebuilt = build_reference(ref, labels, std::move(markers));
-        set_up_scorer().run(mat, prebuilt, best, scores, delta);
-        return;
-    }
-
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * @param ref An expression matrix for the reference expression profiles.
-     * This should have non-zero columns and the same number of rows (i.e., genes) at `mat`.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     *
-     * @return A `BasicScorer::Results` object containing the assigned labels and scores.
-     */
-    BasicScorer::Results run(const tatami::Matrix<double, int>* mat, const tatami::Matrix<double, int>* ref, const int* labels, Markers markers) const {
-        auto prebuilt = build_reference(ref, labels, std::move(markers));
-        return set_up_scorer().run(mat, prebuilt);
-    }
-
-public:
-    /**
-     * @tparam Id Gene identifier for each row.
-     *
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * @param[in] mat_id Pointer to an array of identifiers of length equal to the number of rows of `mat`.
-     * This should contain a unique identifier for each row of `mat` (typically a gene name or index).
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] ref_id Pointer to an array of identifiers of length equal to the number of rows of any `ref`.
-     * This should contain a unique identifier for each row in `ref`, and should be comparable to `mat_id`.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the index of the assigned label for each cell.
-     * @param[out] scores Vector of pointers of length equal to the number of labels.
-     * Each pointer should point to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the (non-fine-tuned) score for that label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
-     * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
-     * This may also be `NULL` in which case the deltas are not reported.
-     * 
-     * This version of `run()` applies an intersection to find the common genes between `mat` and `ref`, based on their shared values in `mat_id` and `ref_id`.
-     * The annotation is then performed using only the subset of common genes.
-     * The aim is to easily accommodate differences in feature annotation between the test and reference profiles.
-     */
-    template<class Id>
-    void run(
-        const tatami::Matrix<double, int>* mat, 
-        const Id* mat_id, 
-        const tatami::Matrix<double, int>* ref, 
-        const Id* ref_id, 
-        const int* labels,
-        Markers markers, 
-        int* best,
-        std::vector<double*>& scores,
-        double* delta) 
-    const {
-        auto built = build_reference(mat->nrow(), mat_id, ref, ref_id, labels, std::move(markers));
-        set_up_scorer().run(mat, built, best, scores, delta);
-        return;
-    }
-
-    /**
-     * @tparam Id Gene identifier for each row.
-     *
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * @param[in] mat_id Pointer to an array of identifiers of length equal to the number of rows of `mat`.
-     * This should contain a unique identifier for each row of `mat` (typically a gene name or index).
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] ref_id Pointer to an array of identifiers of length equal to the number of rows of any `ref`.
-     * This should contain a unique identifier for each row in `ref`, and should be comparable to `mat_id`.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
-     *
-     * @return A `BasicScorer::Results` object containing the assigned labels and scores.
-     */ 
-    template<class Id>
-    BasicScorer::Results run(const tatami::Matrix<double, int>* mat, const Id* mat_id, const tatami::Matrix<double, int>* ref, const Id* ref_id, const int* labels, Markers markers) const {
-        auto built = build_reference(mat->nrow(), mat_id, ref, ref_id, labels, std::move(markers));
-        return set_up_scorer().run(mat, built);
-    }
-};
-
-}
-
-#endif
diff --git a/inst/include/singlepp/IntegratedBuilder.hpp b/inst/include/singlepp/IntegratedBuilder.hpp
deleted file mode 100644
index 1ba3509..0000000
--- a/inst/include/singlepp/IntegratedBuilder.hpp
+++ /dev/null
@@ -1,578 +0,0 @@
-#ifndef SINGLEPP_INTEGRATED_BUILDER_HPP
-#define SINGLEPP_INTEGRATED_BUILDER_HPP
-
-#include "macros.hpp"
-
-#include "scaled_ranks.hpp"
-#include "BasicBuilder.hpp"
-
-#include <vector>
-#include <unordered_set>
-#include <unordered_map>
-#include <algorithm>
-
-/**
- * @file IntegratedBuilder.hpp
- *
- * @brief Prepare for integrated classification across references.
- */
-
-namespace singlepp {
-
-/**
- * @brief Reference datasets prepared for integrated classification.
- */
-struct IntegratedReferences {
-    /**
-     * @return Number of reference datasets.
-     * Each object corresponds to the reference used in an `IntegratedBuilder::add()` call, in the same order.
-     */
-    size_t num_references() const {
-        return markers.size();
-    }
-
-    /**
-     * @param r Reference dataset of interest.
-     * @return Number of labels in this reference.
-     */
-    size_t num_labels(size_t r) const {
-        return markers[r].size();
-    }
-
-    /**
-     * @param r Reference dataset of interest.
-     * @return Number of profiles in this reference.
-     */
-    size_t num_profiles(size_t r) const {
-        size_t n = 0;
-        for (const auto& ref : ranked[r]) {
-            n += ref.size();
-        }
-        return n;
-    }
-
-    /**
-     * @cond
-     */
-    std::vector<int> universe; // To be used by IntegratedScorer for indexed extraction.
-
-    std::vector<bool> check_availability;
-    std::vector<std::unordered_set<int> > available; // indices to 'universe'
-    std::vector<std::vector<std::vector<int> > > markers; // indices to 'universe'
-    std::vector<std::vector<std::vector<RankedVector<int, int> > > > ranked; // .second contains indices to 'universe'
-
-    void resize(size_t n) {
-        check_availability.resize(n);
-        available.resize(n);
-        markers.resize(n);
-        ranked.resize(n);
-    }
-    /**
-     * @endcond
-     */
-};
-
-/**
- * @brief Factory to prepare multiple references for integrated classification.
- *
- * For each reference dataset, we expect a `BasicBuilder::Prebuilt` or `BasicBuilder::PrebuiltIntersection` object,
- * as well as the original data structures (matrix, labels, etc.) used to construct that object.
- * These values are passed into `add()` to register that dataset, which can be repeated multiple times for different references.
- * Finally, calling `finish()` will return a vector of integrated references that can be used in `IntegratedScorer::run()`.
- * 
- * The preparation process mostly involves checking that the gene indices are consistent across references.
- * This is especially true when each reference contains a different set of features that must be intersected with the features in the test dataset.
- * See the documentation for `IntegratedScorer` for more details on the classification based on the integrated references.
- */
-class IntegratedBuilder {
-private:
-    std::vector<const tatami::Matrix<double, int>*> stored_matrices;
-    std::vector<const int*> stored_labels;
-    IntegratedReferences references;
-    std::vector<std::unordered_map<int, int> > gene_mapping;
-    int nthreads = Defaults::num_threads;
-
-public:
-    /**
-     * @brief Default parameters.
-     */
-    struct Defaults {
-        /**
-         * See `set_num_threads()` for details.
-         */
-        static constexpr int num_threads = 1;
-    };
-
-    /**
-     * @param n Number of threads to use.
-     *
-     * @return A reference to this `IntegratedBuilder` object.
-     */
-    IntegratedBuilder& set_num_threads(int n = Defaults::num_threads) {
-        nthreads = n;
-        return *this;
-    }
-
-private:
-    void add_internal_base(const tatami::Matrix<double, int>* ref, const int* labels) {
-        stored_matrices.push_back(ref);
-        stored_labels.push_back(labels);
-        references.resize(stored_matrices.size());
-        gene_mapping.resize(stored_matrices.size());
-    }
-
-    // Adding a reference without requiring any pruning of markers.
-    template<class Subset>
-    void add_internal_direct(const tatami::Matrix<double, int>* ref, const int* labels, const Markers& old_markers, const Subset& mat_subset) {
-        add_internal_base(ref, labels);
-
-        // Adding the markers for each label, indexed according to their
-        // position in the test matrix. This assumes that 'mat_subset' is
-        // appropriately specified to contain the test's row indices. 
-        auto& new_markers = references.markers.back();
-        new_markers.reserve(old_markers.size());
-
-        for (size_t i = 0; i < old_markers.size(); ++i) {
-            const auto& cur_old_markers = old_markers[i];
-
-            std::unordered_set<int> unified;
-            for (const auto& x : cur_old_markers) {
-                unified.insert(x.begin(), x.end());
-            }
-
-            new_markers.emplace_back(unified.begin(), unified.end());
-
-            if constexpr(!std::is_same<Subset, bool>::value) {
-                auto& cur_new_markers = new_markers.back();
-                for (auto& y : cur_new_markers) {
-                    y = mat_subset[y];
-                }
-            }
-        }
-        return;
-    }
-
-    // Adding a reference with different features from the test, requiring some
-    // pruning to remove markers that are not present in the intersection.
-    template<class Subset>
-    void add_internal_intersect(
-        const std::vector<std::pair<int, int> >& intersection,
-        const tatami::Matrix<double, int>* ref, 
-        const int* labels, 
-        const Markers& old_markers,
-        const Subset& ref_subset)
-    {
-        add_internal_base(ref, labels);
-
-        // Manually constructing the markers. This involves (i) pruning out the
-        // markers that aren't present in the intersection, and (ii) updating 
-        // their indices so that they point to rows of 'mat', not 'ref'.
-        std::unordered_map<int, int> reverse_map;
-        for (const auto& i : intersection) {
-            reverse_map[i.second] = i.first;
-        }
-
-        auto subindex = [&](int i) -> int {
-            if constexpr(!std::is_same<Subset, bool>::value) {
-                return ref_subset[i];
-            } else {
-                return i;
-            }
-        };
-
-        auto& new_markers = references.markers.back();
-        new_markers.resize(old_markers.size());
-
-        for (size_t i = 0; i < old_markers.size(); ++i) {
-            const auto& cur_old_markers = old_markers[i];
-
-            std::unordered_set<int> unified;
-            for (const auto& x : cur_old_markers) {
-                unified.insert(x.begin(), x.end());
-            }
-
-            auto& cur_new_markers = new_markers[i];
-            cur_new_markers.reserve(unified.size());
-
-            for (auto y : unified) {
-                auto it = reverse_map.find(subindex(y));
-                if (it != reverse_map.end()) {
-                    cur_new_markers.push_back(it->second);
-                }
-            }
-        }
-
-        // Constructing the mapping of mat's rows to the reference rows.
-        references.check_availability.back() = true;
-        auto& mapping = gene_mapping.back();
-        for (const auto& i : intersection) {
-            mapping[i.first] = i.second;
-        }
-        return;
-    }
-
-public:
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload assumes that the reference and test datasets have the same features.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     *
-     * @param ref Matrix containing the reference expression values.
-     * Rows are features and columns are reference samples.
-     * The number and identity of features should be identical to the test dataset to be classified in `IntegratedScorer`.
-     * @param[in] labels Pointer to an array of label assignments.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels in `ref`, see `Markers` for more details.
-     */
-    void add(const tatami::Matrix<double, int>* ref, const int* labels, const Markers& markers) {
-        add_internal_direct(ref, labels, markers, false);    
-    }
-
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload automatically identifies the intersection of features between the test and reference datasets.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`.
-     *
-     * @tparam Id Type of the gene identifier for each row.
-     *
-     * @param mat_nrow Number of rows (genes) in the test dataset.
-     * @param[in] mat_id Pointer to an array of identifiers of length equal to `mat_nrow`.
-     * This should contain a unique identifier for each row of `mat` (typically a gene name or index).
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] ref_id Pointer to an array of identifiers of length equal to the number of rows of any `ref`.
-     * This should contain a unique identifier for each row in `ref`, and should be comparable to `mat_id`.
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels in `ref`, see `Markers` for more details.
-     */
-    template<typename Id>
-    void add(size_t mat_nrow,
-        const Id* mat_id,
-        const tatami::Matrix<double, int>* ref, 
-        const Id* ref_id,
-        const int* labels, 
-        const Markers& markers)
-    {
-        auto intersection = intersect_features(mat_nrow, mat_id, ref->nrow(), ref_id);
-        add_internal_intersect(intersection, ref, labels, markers, false);
-    }
-
-public:
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload assumes that the reference and test datasets have the same features,
-     * and that the reference dataset has already been processed through `BasicBuilder::run()`.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     *
-     * @param ref Matrix containing the reference expression values.
-     * Rows are features and columns are reference samples.
-     * The number and identity of features should be identical to the test dataset to be classified in `IntegratedScorer`.
-     * @param[in] labels Pointer to an array of label assignments.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param built The built reference created by running `BasicBuilder::run()` on `ref` and `labels`.
-     */
-    void add(const tatami::Matrix<double, int>* ref, const int* labels, const BasicBuilder::Prebuilt& built) {
-        add_internal_direct(ref, labels, built.markers, built.subset);
-        return;
-    }
-
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload requires an existing intersection between the test and reference datasets,
-     * and assumes that the reference dataset has already been processed through `BasicBuilder::run()`.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     *
-     * @param intersection Vector defining the intersection of features between the test and reference datasets.
-     * Each entry is a pair where the first element is the row index in the test matrix,
-     * and the second element is the row index for the corresponding feature in the reference matrix.
-     * Each row index for either matrix should occur no more than once in `intersection`.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param built The built reference created by running `BasicBuilder::run()` on all preceding arguments.
-     */
-    void add(const std::vector<std::pair<int, int> >& intersection,
-        const tatami::Matrix<double, int>* ref, 
-        const int* labels, 
-        const BasicBuilder::PrebuiltIntersection& built) 
-    {
-        add_internal_direct(ref, labels, built.markers, built.mat_subset);
-        references.check_availability.back() = true;
-
-        // Constructing the mapping of mat's rows to the reference rows.
-        auto& mapping = gene_mapping.back();
-        for (const auto& i : intersection) {
-            mapping[i.first] = i.second;
-        }
-        return;
-    }
-
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload automatically identifies the intersection of features between the test and reference datasets,
-     * and assumes that the reference dataset has already been processed through `BasicBuilder::run()`.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`.
-     *
-     * @tparam Id Type of the gene identifier for each row.
-     *
-     * @param mat_nrow Number of rows (genes) in the test dataset.
-     * @param[in] mat_id Pointer to an array of identifiers of length equal to `mat_nrow`.
-     * This should contain a unique identifier for each row of `mat` (typically a gene name or index).
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] ref_id Pointer to an array of identifiers of length equal to the number of rows of any `ref`.
-     * This should contain a unique identifier for each row in `ref`, and should be comparable to `mat_id`.
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param built The built reference created by running `BasicBuilder::run()` on all preceding arguments.
-     */
-    template<typename Id>
-    void add(size_t mat_nrow,
-        const Id* mat_id,
-        const tatami::Matrix<double, int>* ref, 
-        const Id* ref_id,
-        const int* labels, 
-        const BasicBuilder::PrebuiltIntersection& built) 
-    {
-        auto intersection = intersect_features(mat_nrow, mat_id, ref->nrow(), ref_id);
-        add(intersection, ref, labels, built);
-        return;
-    }
-
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload assumes that the reference and test datasets have the same features,
-     * and that the reference dataset has already been processed through `BasicBuilder::run()`.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     *
-     * @param intersection Vector defining the intersection of features betweent the test and reference datasets.
-     * Each entry is a pair where the first element is the row index in the test matrix,
-     * and the second element is the row index for the corresponding feature in the reference matrix.
-     * Each row index for either matrix should occur no more than once in `intersection`.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param built The built reference created by running `BasicBuilder::run()` on `ref` and `labels`.
-     */
-    void add(const std::vector<std::pair<int, int> >& intersection,
-        const tatami::Matrix<double, int>* ref, 
-        const int* labels, 
-        const BasicBuilder::Prebuilt& built) 
-    {
-        add_internal_intersect(intersection, ref, labels, built.markers, built.subset);
-    }
-
-    /**
-     * Add a reference dataset to this object for later use in `finish()`.
-     * This overload automatically identifies the intersection of features between the test and reference datasets,
-     * and assumes that the reference dataset has already been processed through `BasicBuilder::run()`.
-     * `ref` and `labels` are expected to remain valid until `finish()` is called.
-     * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`.
-     *
-     * @tparam Id Type of the gene identifier for each row.
-     *
-     * @param mat_nrow Number of rows (genes) in the test dataset.
-     * @param[in] mat_id Pointer to an array of identifiers of length equal to `mat_nrow`.
-     * This should contain a unique identifier for each row of `mat` (typically a gene name or index).
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param ref An expression matrix for the reference expression profiles, where rows are genes and columns are cells.
-     * This should have non-zero columns.
-     * @param[in] ref_id Pointer to an array of identifiers of length equal to the number of rows of any `ref`.
-     * This should contain a unique identifier for each row in `ref`, and should be comparable to `mat_id`.
-     * If any duplicate IDs are present, only the first occurrence is used.
-     * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample.
-     * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
-     * @param built The built reference created by running `BasicBuilder::run()` on `ref` and `labels`.
-     */
-    template<typename Id>
-    void add(size_t mat_nrow,
-        const Id* mat_id,
-        const tatami::Matrix<double, int>* ref, 
-        const Id* ref_id,
-        const int* labels, 
-        const BasicBuilder::Prebuilt& built) 
-    {
-        auto intersection = intersect_features(mat_nrow, mat_id, ref->nrow(), ref_id);
-        add(intersection, ref, labels, built);
-    }
-
-private:
-    /* Here, we've split out some of the functions for easier reading.
-     * Otherwise everything would land in a single mega-function.
-     */
-
-    static void fill_ranks(
-        const tatami::Matrix<double, int>* curmat, 
-        const int* curlab, 
-        const std::vector<int>& in_use, 
-        const std::vector<int>& positions, 
-        std::vector<std::vector<RankedVector<int, int> > >& cur_ranked,
-        int nthreads) 
-    {
-        // If we don't need to check availability, this implies that 
-        // the reference has 1:1 feature mapping to the test dataset.
-        // In that case, we can proceed quite simply.
-        tatami::parallelize([&](int, int start, int len) -> void {
-            RankedVector<double, int> tmp_ranked;
-            tmp_ranked.reserve(in_use.size());
-
-            // 'in_use' is guaranteed to be sorted and unique, see its derivation in finish().
-            // This means we can directly use it for indexed extraction.
-            auto wrk = tatami::consecutive_extractor<false>(curmat, false, start, len, in_use); 
-            std::vector<double> buffer(in_use.size());
-
-            for (int c = start, end = start + len; c < end; ++c) {
-                auto ptr = wrk->fetch(c, buffer.data());
-
-                tmp_ranked.clear();
-                for (int i = 0, end = in_use.size(); i < end; ++i, ++ptr) {
-                    tmp_ranked.emplace_back(*ptr, i);
-                }
-                std::sort(tmp_ranked.begin(), tmp_ranked.end());
-
-                auto& final_ranked = cur_ranked[curlab[c]][positions[c]];
-                simplify_ranks(tmp_ranked, final_ranked);
-            }
-        }, curmat->ncol(), nthreads);
-    }
-
-    static void fill_ranks(
-        const tatami::Matrix<double, int>* curmat, 
-        const int* curlab, 
-        const std::vector<int>& in_use, 
-        const std::vector<int>& positions,
-        const std::unordered_map<int, int>& cur_mapping,
-        std::unordered_set<int>& cur_available,
-        std::vector<std::vector<RankedVector<int, int> > >& cur_ranked,
-        int nthreads)
-    {
-        // If we need to check availability, then we need to check
-        // the mapping of test genes to row indices of the reference.
-        std::vector<std::pair<int, int> > remapping; 
-        remapping.reserve(in_use.size());
-
-        for (int i = 0, end = in_use.size(); i < end; ++i) {
-            auto it = cur_mapping.find(in_use[i]);
-            if (it != cur_mapping.end()) {
-                remapping.emplace_back(it->second, i); // using 'i' instead of 'in_use[i]', as we want to work with indices into 'in_use', not the values of 'in_use' themselves.
-                cur_available.insert(i);
-            }
-        }
-
-        std::sort(remapping.begin(), remapping.end());
-
-        // This section is just to enable indexed extraction by tatami.
-        // There's no need to consider duplicates among the
-        // 'remapping[i].first', 'cur_mapping->second' is guaranteed to be
-        // unique as a consequence of how intersect_features works.
-        std::vector<int> remapped_in_use; 
-        remapped_in_use.reserve(remapping.size());
-        for (const auto& p : remapping) {
-            remapped_in_use.push_back(p.first);
-        }
-
-        tatami::parallelize([&](int, int start, int len) -> void {
-            RankedVector<double, int> tmp_ranked;
-            tmp_ranked.reserve(remapped_in_use.size());
-            std::vector<double> buffer(remapped_in_use.size());
-            auto wrk = tatami::consecutive_extractor<false>(curmat, false, start, len, remapped_in_use);
-
-            for (size_t c = start, end = start + len; c < end; ++c) {
-                auto ptr = wrk->fetch(c, buffer.data());
-
-                tmp_ranked.clear();
-                for (const auto& p : remapping) {
-                    tmp_ranked.emplace_back(*ptr, p.second); // remember, 'p.second' corresponds to indices into 'in_use'.
-                    ++ptr;
-                }
-                std::sort(tmp_ranked.begin(), tmp_ranked.end());
-
-                auto& final_ranked = cur_ranked[curlab[c]][positions[c]];
-                simplify_ranks(tmp_ranked, final_ranked);
-            }
-        }, curmat->ncol(), nthreads);
-    }
-
-public:
-    /**
-     * @return The set of integrated references, for use in `IntegratedScorer`.
-     *
-     * This function should only be called once, after all reference datasets have been registered with `add()`.
-     * Any further invocations of this function will not be valid.
-     */
-    IntegratedReferences finish() {
-        // Identify the union of all marker genes.
-        auto& in_use = references.universe;
-        std::unordered_map<int, int> remap_to_universe;
-        {
-            std::unordered_set<int> in_use_tmp;
-            for (const auto& refmarkers : references.markers) {
-                for (const auto& mrk : refmarkers) {
-                    in_use_tmp.insert(mrk.begin(), mrk.end());
-                }
-            }
-
-            in_use.insert(in_use.end(), in_use_tmp.begin(), in_use_tmp.end());
-            std::sort(in_use.begin(), in_use.end());
-
-            for (int i = 0, end = in_use.size(); i < end; ++i) {
-                remap_to_universe[in_use[i]] = i;
-            }
-        }
-
-        for (size_t r = 0, end = references.num_references(); r < end; ++r) {
-            auto curlab = stored_labels[r];
-            auto curmat = stored_matrices[r];
-
-            // Reindexing the markers to point to the universe.
-            auto& curmarkers = references.markers[r];
-            for (auto& outer : curmarkers) {
-                for (auto& x : outer) {
-                    x = remap_to_universe.find(x)->second;
-                }
-            }
-
-            auto& cur_ranked = references.ranked[r];
-            std::vector<int> positions;
-            {
-                size_t nlabels = curmarkers.size();
-                size_t NC = curmat->ncol();
-                positions.reserve(NC);                
-
-                std::vector<int> samples_per_label(nlabels);
-                for (size_t c = 0; c < NC; ++c) {
-                    auto& pos = samples_per_label[curlab[c]];
-                    positions.push_back(pos);
-                    ++pos;
-                }
-
-                cur_ranked.resize(nlabels);
-                for (size_t l = 0; l < nlabels; ++l) {
-                    cur_ranked[l].resize(samples_per_label[l]);
-                }
-            }
-
-            // Finally filling the rankings.
-            if (!references.check_availability[r]) {
-                fill_ranks(curmat, curlab, in_use, positions, references.ranked[r], nthreads);
-            } else {
-                fill_ranks(curmat, curlab, in_use, positions, gene_mapping[r], references.available[r], references.ranked[r], nthreads);
-            }
-        }
-
-        return std::move(references);
-    }
-};
-
-}
-
-#endif
diff --git a/inst/include/singlepp/IntegratedScorer.hpp b/inst/include/singlepp/IntegratedScorer.hpp
deleted file mode 100644
index e57b804..0000000
--- a/inst/include/singlepp/IntegratedScorer.hpp
+++ /dev/null
@@ -1,344 +0,0 @@
-#ifndef SINGLEPP_RECOMPUTE_SCORES_HPP
-#define SINGLEPP_RECOMPUTE_SCORES_HPP
-
-#include "macros.hpp"
-
-#include "tatami/tatami.hpp"
-
-#include "compute_scores.hpp"
-#include "scaled_ranks.hpp"
-#include "Classifier.hpp"
-#include "IntegratedBuilder.hpp"
-
-#include <vector>
-#include <unordered_map>
-#include <unordered_set>
-
-/**
- * @file IntegratedScorer.hpp
- *
- * @brief Integrate classifications from multiple references.
- */
-
-namespace singlepp {
-
-/**
- * @brief Integrate classifications from multiple references.
- *
- * In situations where multiple reference datasets are available,
- * we would like to obtain a single prediction for each cell from all of those references.
- * This is somewhat tricky as the different references are likely to contain strong batch effects,
- * complicating the calculation of marker genes between labels from different references (and thus precluding direct use of the usual `Classifier::run()`).
- * The labels themselves also tend to be inconsistent, e.g., different vocabularies and resolutions, making it difficult to define sensible groups in a combined "super-reference".
- *
- * To avoid these issues, we first perform classification within each reference individually.
- * For each test cell, we identify its predicted label from a given reference, and we collect all the marker genes for that label (across all pairwise comparisons in that reference).
- * After doing this for each reference, we pool all of the collected markers to obtain a common set of interesting genes.
- * We then compute the correlation-based score between the test cell's expression profile and its predicted label from each reference, using that common set of genes.
- * The label with the highest score is considered the best representative across all references.
- *
- * This strategy is similar to using `Classifier::run()` without fine-tuning, 
- * except that we are choosing between the best labels from all references rather than between all labels from one reference.
- * The main idea is to create a common feature set so that the correlations can be reasonably compared across references.
- * Note that differences in the feature sets across references are tolerated by simply ignoring missing genes when computing the correlations.
- * This reduces the comparability of the scores as the effective feature set will vary a little (or a lot, depending) across references;
- * nonetheless, it is preferred to taking the intersection, which is liable to leave us with very few genes.
- *
- * Our approach avoids any direct comparison between the expression profiles of different references,
- * allowing us to side-step the question of how to deal with the batch effects.
- * Similarly, we defer responsibility on solving the issue of label heterogeneity,
- * by just passing along the existing labels and leaving it to the user's interpretation.
- */
-class IntegratedScorer {
-public:
-    /**
-     * @brief Default parameters.
-     */
-    struct Defaults {
-        /**
-         * See `set_quantile()` for details.
-         */
-        static constexpr double quantile = Classifier::Defaults::quantile;
-
-        /**
-         * See `set_num_threads()` for details.
-         */
-        static constexpr int num_threads = 1;
-    };
-
-    /**
-     * @param q Quantile to use to compute a per-label score from the correlations.
-     *
-     * @return A reference to this `IntegratedScorer` object.
-     *
-     * See `Classifier::set_quantile()` for more details.
-     */
-    IntegratedScorer& set_quantile(double q = Defaults::quantile) {
-        quantile = q;
-        return *this;
-    }
-
-    /**
-     * @param n Number of threads to use.
-     * By default, this is inherited from the parent `IntegratedBuilder` object. 
-     *
-     * @return A reference to this `IntegratedScorer` object.
-     */
-    IntegratedScorer& set_num_threads(int n = Defaults::num_threads) {
-        nthreads = n;
-        return *this;
-    }
-
-private:
-    double quantile = Defaults::quantile;
-    int nthreads = Defaults::num_threads;
-
-private:
-    /* Here, we've split out some of the functions for easier reading.
-     * Otherwise everything would land in a single mega-function.
-     */
-
-    static void build_miniverse(int cell,
-        const std::vector<const int*>& assigned,
-        const IntegratedReferences& built,
-        std::unordered_set<int>& uset, 
-        std::vector<int>& uvec) 
-    {
-        uset.clear();
-        size_t nref = built.num_references();
-
-        for (size_t r = 0; r < nref; ++r) {
-            auto best = assigned[r][cell];
-            const auto& markers = built.markers[r][best];
-            uset.insert(markers.begin(), markers.end());
-        }
-
-        uvec.clear();
-        uvec.insert(uvec.end(), uset.begin(), uset.end());
-        std::sort(uvec.begin(), uvec.end());
-        return;
-    }
-
-    template<class Extractor_>
-    static void fill_ranks(
-        Extractor_* wrk,
-        const std::vector<int>& miniverse,        
-        int cell,
-        std::vector<double>& buffer,
-        RankedVector<double, int>& data_ranked) 
-    {
-        data_ranked.clear();
-        if (miniverse.empty()) {
-            return;
-        }
-
-        auto ptr = wrk->fetch(cell, buffer.data());
-        for (auto u : miniverse) {
-            data_ranked.emplace_back(ptr[u], u);
-        }
-
-        std::sort(data_ranked.begin(), data_ranked.end());
-        return;
-    }
-
-    static void prepare_mapping(
-        const IntegratedReferences& built, 
-        size_t ref,
-        const std::vector<int>& miniverse,
-        std::unordered_map<int, int>& mapping)
-    {
-        mapping.clear();
-        if (built.check_availability[ref]) {
-            const auto& cur_available = built.available[ref];
-
-            // If we need to check availability, we reconstruct the mapping
-            // for the intersection of features available in this reference.
-            // We then calculate the scaled ranks for the data.
-            int counter = 0;
-            for (auto c : miniverse) {
-                if (cur_available.find(c) != cur_available.end()) {
-                    mapping[c] = counter;
-                    ++counter;
-                }
-            }
-        } else {
-            // If we don't need to check availability, the mapping is
-            // much simpler. Technically, we could do this in the outer
-            // loop if none of the references required checks, but this
-            // seems like a niche optimization in practical settings.
-            for (size_t s = 0; s < miniverse.size(); ++s) {
-                auto u = miniverse[s];
-                mapping[u] = s;
-            }
-        } 
-        return;
-    }
-
-public:
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * The identity of the rows should be consistent with the arguments used in `IntegratedBuilder::add()`.
-     * @param[in] assigned Vector of pointers of length equal to the number of references.
-     * Each pointer should point to an array of length equal to the number of columns in `mat`,
-     * containing the assigned label for each column in each reference.
-     * @param built Set of integrated references produced by `IntegratedBuilder::finish()`.
-     * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the index of the reference with the best label for each cell.
-     * @param[out] scores Vector of pointers of length equal to the number of references.
-     * Each pointer should point to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the (non-fine-tuned) score for the best label of that reference for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
-     * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * On output, this is filled with the difference between the highest and second-highest scores.
-     * This may also be `NULL` in which case the deltas are not reported.
-     */
-    void run(
-        const tatami::Matrix<double, int>* mat,
-        const std::vector<const int*>& assigned,
-        const IntegratedReferences& built,
-        int* best,
-        std::vector<double*>& scores,
-        double* delta)
-    const {
-        auto NR = mat->nrow();
-        auto nref = built.num_references();
-
-        tatami::parallelize([&](int, int start, int len) -> void {
-            // We perform an indexed extraction, so all subsequent indices
-            // will refer to indices into this subset (i.e., 'built.universe').
-            auto wrk = tatami::consecutive_extractor<false>(mat, false, start, len, built.universe); 
-            std::vector<double> buffer(built.universe.size());
-
-            RankedVector<double, int> data_ranked, data_ranked2;
-            data_ranked.reserve(NR);
-            data_ranked2.reserve(NR);
-            RankedVector<int, int> ref_ranked;
-            ref_ranked.reserve(NR);
-
-            std::vector<double> scaled_data(NR);
-            std::vector<double> scaled_ref(NR);
-            std::unordered_set<int> miniverse_tmp;
-            std::vector<int> miniverse;
-            std::unordered_map<int, int> mapping;
-            std::vector<double> all_correlations;
-
-            for (int i = start, end = start + len; i < end; ++i) {
-                build_miniverse(i, assigned, built, miniverse_tmp, miniverse);
-                fill_ranks(wrk.get(), miniverse, i, buffer, data_ranked);
-
-                // Scanning through each reference and computing the score for the best group.
-                double best_score = -1000, next_best = -1000;
-                int best_ref = 0;
-
-                for (size_t r = 0; r < nref; ++r) {
-                    prepare_mapping(built, r, miniverse, mapping);
-
-                    scaled_ref.resize(mapping.size());
-                    scaled_data.resize(mapping.size());
-
-                    data_ranked2.clear();
-                    subset_ranks(data_ranked, data_ranked2, mapping);
-                    scaled_ranks(data_ranked2, scaled_data.data());
-
-                    // Now actually calculating the score for the best group for
-                    // this cell in this reference. This assumes that 'ref.ranked'
-                    // already contains sorted pairs where the indices refer to the
-                    // rows of the original data matrix.
-                    auto best = assigned[r][i];
-                    const auto& best_ranked = built.ranked[r][best];
-                    all_correlations.clear();
-
-                    for (size_t s = 0; s < best_ranked.size(); ++s) {
-                        ref_ranked.clear();
-                        subset_ranks(best_ranked[s], ref_ranked, mapping);
-                        scaled_ranks(ref_ranked, scaled_ref.data());
-                        double cor = distance_to_correlation(scaled_ref.size(), scaled_data, scaled_ref);
-                        all_correlations.push_back(cor);
-                    }
-
-                    double score = correlations_to_scores(all_correlations, quantile);
-                    if (scores[r]) {
-                        scores[r][i] = score;
-                    }
-                    if (score > best_score) {
-                        next_best = best_score;
-                        best_score = score;
-                        best_ref = r;
-                    } else if (score > next_best) {
-                        next_best = score;
-                    }
-                }
-
-                if (best) {
-                    best[i] = best_ref;
-                }
-                if (delta && nref > 1) {
-                    delta[i] = best_score - next_best;
-                }
-            }
-
-        }, mat->ncol(), nthreads);
-    }
-
-public:
-    /**
-     * @brief Results of the integrated annotation.
-     */
-    struct Results {
-        /**
-         * @cond
-         */
-        Results(size_t ncells, size_t nrefs) : best(ncells), scores(nrefs, std::vector<double>(ncells)), delta(ncells) {}
-
-        std::vector<double*> scores_to_pointers() {
-            std::vector<double*> output(scores.size());
-            for (size_t s = 0; s < scores.size(); ++s) {
-                output[s] = scores[s].data();
-            }
-            return output;
-        };
-        /**
-         * @endcond
-         */
-
-        /** 
-         * Vector of length equal to the number of cells in the test dataset,
-         * containing the index of the reference with the top-scoring label for each cell.
-         */
-        std::vector<int> best;
-
-        /**
-         * Vector of length equal to the number of references,
-         * containing vectors of length equal to the number of cells in the test dataset.
-         * Each vector corresponds to a reference and contains the score for the best label in that reference for each cell.
-         */
-        std::vector<std::vector<double> > scores;
-
-        /** 
-         * Vector of length equal to the number of cells in the test dataset.
-         * This contains the difference between the highest and second-highest scores for each cell.
-         */
-        std::vector<double> delta;
-    };
-
-    /**
-     * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
-     * The identity of the rows should be consistent with the arguments used in `IntegratedBuilder::add()`.
-     * @param[in] assigned Vector of pointers of length equal to the number of references.
-     * Each pointer should point to an array of length equal to the number of columns in `mat`,
-     * containing the assigned label for each column in each reference.
-     * @param built Set of integrated references produced by `IntegratedBuilder::finish()`.
-     *
-     * @return A `Results` object containing the assigned labels and scores.
-     */
-    Results run(const tatami::Matrix<double, int>* mat, const std::vector<const int*>& assigned, const IntegratedReferences& built) const {
-        Results output(mat->ncol(), built.num_references());
-        auto scores = output.scores_to_pointers();
-        run(mat, assigned, built, output.best.data(), scores, output.delta.data());
-        return output;
-    }
-};
-
-}
-
-#endif
diff --git a/inst/include/singlepp/Markers.hpp b/inst/include/singlepp/Markers.hpp
deleted file mode 100644
index 9228ec6..0000000
--- a/inst/include/singlepp/Markers.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef SINGLEPP_MARKERS_HPP
-#define SINGLEPP_MARKERS_HPP
-
-#include "macros.hpp"
-
-#include <vector>
-
-/**
- * @file Markers.hpp
- *
- * @brief Define the `Markers` typedef.
- */
-
-namespace singlepp {
-
-/**
- * A vector of vectors of ranked marker lists, used to determine which features should be used to compute correlations in `Classifier`.
- *
- * For a `Markers` object `markers`, let us consider the vector at `markers[0][1]`.
- * This vector is expected to contain the ranked indices of the marker genes for label 0 compared to label 1.
- * Most typically, this is generated by identifying the genes that are upregulated in label 0 compared to 1 and sorting by decreasing effect size.
- * Indices should refer to the rows of the reference expression matrices (i.e., `ref` in the various `Classifier::run()` or `BasicBuilder::run()` methods).
- * So, for example, `markers[0][1][0]` should contain the row index of the most upregulated gene in label 0 compared to 1.
- *
- * For a given reference dataset, the corresponding `Markers` object should have length equal to the number of labels in that reference.
- * Each middle vector (i.e., `markers[i]` for non-negative `i` less than the number of labels) should also have length equal to the number of labels.
- * The innermost vectors that are not on the diagonal (i.e., `markers[i][j]` for `i != j`) may be of any positive length and should contain unique row indices.
- * Any innermost vector along the "diagonal" (i.e., `markers[i][i]`) is typically of zero length.
- *
- * Ideally, the non-diagonal innermost vectors should be at least as long as the setting of `Classifier::set_top()`.
- * The cell type annotation will still work with shorter (non-empty) vectors but it will not be possible to achieve the user-specified `set_top()`.
- * In cases involving feature intersections in `run()`, the vectors should be long enough to achieve the specified `set_top()` after removing non-shared genes.
- * For longer vectors, the annotation will safely ignore the indices after the `set_top()` specification.
- *
- * As mentioned previously, the diagonal innermost vectors are typically empty, given that it makes little sense to identify upregulated markers in a label compared to itself.
- * That said, any genes stored on the diagonal will be respected and used in all feature subsets for the corresponding label.
- * This can be exploited by advanced users to efficiently store "universal" markers for a label, i.e., markers that are applicable in all comparisons to other labels.
- */
-typedef std::vector<std::vector<std::vector<int> > > Markers;
-
-}
-
-#endif
diff --git a/inst/include/singlepp/annotate_cells.hpp b/inst/include/singlepp/annotate_cells.hpp
deleted file mode 100644
index 1ae2b9d..0000000
--- a/inst/include/singlepp/annotate_cells.hpp
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifndef SINGLEPP_ANNOTATE_CELLS_HPP
-#define SINGLEPP_ANNOTATE_CELLS_HPP
-
-#include "macros.hpp"
-
-#include "tatami/tatami.hpp"
-
-#include "scaled_ranks.hpp"
-#include "process_features.hpp"
-#include "build_indices.hpp"
-#include "fine_tune.hpp"
-
-#include <vector>
-#include <algorithm>
-#include <cmath>
-
-namespace singlepp {
-
-inline void annotate_cells_simple(
-    const tatami::Matrix<double, int>* mat,
-    size_t num_subset,
-    const int* subset,
-    const std::vector<Reference>& ref,
-    const Markers& markers,
-    double quantile,
-    bool fine_tune,
-    double threshold,
-    int* best, 
-    std::vector<double*>& scores,
-    double* delta,
-    int nthreads)
-{
-    // Figuring out how many neighbors to keep and how to compute the quantiles.
-    const size_t NL = ref.size();
-    std::vector<int> search_k(NL);
-    std::vector<std::pair<double, double> > coeffs(NL);
-    for (size_t r = 0; r < NL; ++r) {
-        double denom = ref[r].index->nobs() - 1;
-        double prod = denom * (1 - quantile);
-        auto k = std::ceil(prod) + 1;
-        search_k[r] = k;
-
-        // `(1 - quantile) - (k - 2) / denom` represents the gap to the smaller quantile,
-        // while `(k - 1) / denom - (1 - quantile)` represents the gap from the larger quantile.
-        // The size of the gap is used as the weight for the _other_ quantile, i.e., 
-        // the closer you are to a quantile, the higher the weight.
-        // We convert these into proportions by dividing by their sum, i.e., `1/denom`.
-        coeffs[r].first = static_cast<double>(k - 1) - prod;
-        coeffs[r].second = prod - static_cast<double>(k - 2);
-    }
-
-    std::vector<int> subcopy(subset, subset + num_subset);
-    SubsetSorter subsorted(subcopy);
-
-    tatami::parallelize([&](int, int start, int length) -> void {
-        auto wrk = tatami::consecutive_extractor<false>(mat, false, start, length, subsorted.extraction_subset());
-        RankedVector<double, int> vec(num_subset);
-        std::vector<double> buffer(num_subset);
-
-        FineTuner ft;
-        std::vector<double> curscores(NL);
-
-        for (int c = start, end = start + length; c < end; ++c) {
-            auto ptr = wrk->fetch(c, buffer.data());
-            subsorted.fill_ranks(ptr, vec);
-            scaled_ranks(vec, buffer.data()); // 'buffer' can be written to, as all data is extracted to 'vec'.
-
-            curscores.resize(NL);
-            for (size_t r = 0; r < NL; ++r) {
-                size_t k = search_k[r];
-                auto current = ref[r].index->find_nearest_neighbors(buffer.data(), k);
-
-                double last = current[k - 1].second;
-                last = 1 - 2 * last * last;
-                if (k == 1) {
-                    curscores[r] = last;
-                } else {
-                    double next = current[k - 2].second;
-                    next = 1 - 2 * next * next;
-                    curscores[r] = coeffs[r].first * next + coeffs[r].second * last;
-                }
-
-                if (scores[r]) {
-                    scores[r][c] = curscores[r];
-                }
-            }
-
-            if (!fine_tune) {
-                auto top = std::max_element(curscores.begin(), curscores.end());
-                best[c] = top - curscores.begin();
-                if (delta) {
-                    if (curscores.size() > 1) {
-                        double topscore = *top;
-                        *top = -100;
-                        delta[c] = topscore - *std::max_element(curscores.begin(), curscores.end());
-                    } else {
-                        delta[c] = std::numeric_limits<double>::quiet_NaN();
-                    }
-                }
-            } else {
-                auto tuned = ft.run(vec, ref, markers, curscores, quantile, threshold);
-                best[c] = tuned.first;
-                if (delta) {
-                    delta[c] = tuned.second;
-                }
-            }
-        }
-
-    }, mat->ncol(), nthreads);
-
-    return;
-}
-
-}
-
-#endif
diff --git a/inst/include/singlepp/build_indices.hpp b/inst/include/singlepp/build_indices.hpp
deleted file mode 100644
index 0dbe22b..0000000
--- a/inst/include/singlepp/build_indices.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef SINGLEPP_BUILD_INDICES_HPP
-#define SINGLEPP_BUILD_INDICES_HPP
-
-#include "macros.hpp"
-
-#include "knncolle/knncolle.hpp"
-#include "tatami/tatami.hpp"
-
-#include "process_features.hpp"
-#include "scaled_ranks.hpp"
-
-#include <vector>
-#include <memory>
-#include <algorithm>
-
-namespace singlepp {
-
-inline size_t get_nlabels(size_t n, const int* labels) { 
-    if (n == 0) {
-        throw std::runtime_error("reference dataset must have at least one column");
-    }
-    return *std::max_element(labels, labels + n) + 1;
-}
-
-struct Reference {
-    std::vector<RankedVector<int, int> > ranked;
-    std::shared_ptr<knncolle::Base<int, double> > index;
-};
-
-template<class Builder>
-std::vector<Reference> build_indices(const tatami::Matrix<double, int>* ref, const int* labels, const std::vector<int>& subset, const Builder& build, int nthreads) {
-    size_t NC = ref->ncol();
-    size_t nlabels = get_nlabels(NC, labels);
-    std::vector<int> label_count(nlabels);
-    for (size_t i = 0; i < NC; ++i) {
-        ++label_count[labels[i]];
-    }
-
-    size_t NR = subset.size();
-    std::vector<Reference> nnrefs(nlabels);
-    std::vector<std::vector<double> > nndata(nlabels);
-    for (size_t l = 0; l < nlabels; ++l) {
-        if (label_count[l] == 0) {
-            throw std::runtime_error(std::string("no entries for label ") + std::to_string(l));
-        }
-        nnrefs[l].ranked.resize(label_count[l]);
-        nndata[l].resize(label_count[l] * NR);
-    }
-
-    std::vector<size_t> offsets(NC);
-    {
-        std::vector<size_t> counter(nlabels);
-        for (size_t c = 0; c < NC; ++c) {
-            auto& o = counter[labels[c]];
-            offsets[c] = o;
-            ++o;
-        }
-    }
-
-    SubsetSorter subsorter(subset);
-
-    tatami::parallelize([&](int, int start, int len) -> void {
-        RankedVector<double, int> ranked(NR);
-        std::vector<double> buffer(ref->nrow());
-        auto wrk = tatami::consecutive_extractor<false>(ref, false, start, len, subsorter.extraction_subset());
-
-        for (int c = start, end = start + len; c < end; ++c) {
-            auto ptr = wrk->fetch(c, buffer.data());
-            subsorter.fill_ranks(ptr, ranked); 
-
-            auto curlab = labels[c];
-            auto curoff = offsets[c];
-            auto scaled = nndata[curlab].data() + curoff * NR;
-            scaled_ranks(ranked, scaled); 
-
-            // Storing as a pair of ints to save space; as long
-            // as we respect ties, everything should be fine.
-            auto& stored_ranks = nnrefs[curlab].ranked[curoff];
-            stored_ranks.reserve(ranked.size());
-            simplify_ranks(ranked, stored_ranks);
-        }
-    }, ref->ncol(), nthreads);
-
-#ifndef SINGLEPP_CUSTOM_PARALLEL
-    #pragma omp parallel for num_threads(nthreads)
-    for (size_t l = 0; l < nlabels; ++l) {
-#else
-    SINGLEPP_CUSTOM_PARALLEL([&](int, size_t start, size_t len) -> void {
-    for (size_t l = start, end = start + len; l < end; ++l) {
-#endif
-        nnrefs[l].index = build(NR, label_count[l], nndata[l].data());
-
-        // Trying to free the memory as we go along, to offset the copying
-        // of nndata into the memory store owned by the knncolle index.
-        nndata[l].clear();
-        nndata[l].shrink_to_fit();
-
-#ifndef SINGLEPP_CUSTOM_PARALLEL
-    }
-#else
-    }
-    }, nlabels, nthreads);
-#endif
-
-    return nnrefs;
-}
-
-}
-
-#endif
diff --git a/inst/include/singlepp/compute_scores.hpp b/inst/include/singlepp/compute_scores.hpp
deleted file mode 100644
index 8ad93fe..0000000
--- a/inst/include/singlepp/compute_scores.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef SINGLEPP_COMPUTE_SCORES_HPP
-#define SINGLEPP_COMPUTE_SCORES_HPP
-
-#include "macros.hpp"
-
-#include <vector>
-#include <algorithm>
-#include <limits>
-#include <cmath>
-
-namespace singlepp {
-
-inline double correlations_to_scores (std::vector<double>& correlations, double quantile) {
-    const size_t ncells=correlations.size();
-    if (ncells==0) {
-        return std::numeric_limits<double>::quiet_NaN();
-    } else if (quantile==1 || ncells==1) {
-        return *std::max_element(correlations.begin(), correlations.end());
-    } else {
-        const double denom = ncells - 1; 
-        const double prod = denom * quantile;
-        const size_t left = std::floor(prod);
-        const size_t right = std::ceil(prod);
-
-        std::nth_element(correlations.begin(), correlations.begin() + right, correlations.end());
-        const double rightval=correlations[right];
-        if (right == left) {
-            return rightval;
-        }
-
-        // Do NOT be tempted to do the second nth_element with the end at begin()+right;
-        // this does not handle ties properly.
-        std::nth_element(correlations.begin(), correlations.begin() + left, correlations.end());
-        const double leftval=correlations[left];
-
-        // `quantile - left / denom` represents the gap to the smaller quantile,
-        // while `right / denom - quantile` represents the gap from the larger quantile.
-        // The size of the gap is used as the weight for the _other_ quantile, i.e., 
-        // the closer you are to a quantile, the higher the weight.
-        // We convert these into proportions by dividing by their sum, i.e., `1/denom`.
-        const double leftweight = right - prod;
-        const double rightweight = prod - left;
-
-        return rightval * rightweight + leftval * leftweight;
-    }
-}
-
-template<class Vector>
-double distance_to_correlation(size_t n, const Vector& p1, const Vector& p2) {
-    double d2 = 0;
-    for (size_t i = 0; i < n; ++i) {
-        double tmp = p1[i] - p2[i];
-        d2 += tmp * tmp;
-    }
-    return 1 - 2 * d2;
-}
-
-}
-
-#endif
diff --git a/inst/include/singlepp/fine_tune.hpp b/inst/include/singlepp/fine_tune.hpp
deleted file mode 100644
index b7675c0..0000000
--- a/inst/include/singlepp/fine_tune.hpp
+++ /dev/null
@@ -1,170 +0,0 @@
-#ifndef TATAMI_FINE_TUNE_HPP
-#define TATAMI_FINE_TUNE_HPP
-
-#include "macros.hpp"
-
-#include "scaled_ranks.hpp"
-#include "process_features.hpp"
-#include "compute_scores.hpp"
-#include "build_indices.hpp"
-
-#include <vector>
-#include <algorithm>
-#include <unordered_map>
-
-namespace singlepp {
-
-inline std::pair<int, double> fill_labels_in_use(const std::vector<double>& scores, double threshold, std::vector<int>& in_use) {
-    auto it = std::max_element(scores.begin(), scores.end());
-    int best_label = it - scores.begin();
-    double max_score = *it;
-
-    in_use.clear();
-    constexpr double DUMMY = -1000;
-    double next_score = DUMMY;
-    const double bound = max_score - threshold;
-
-    for (size_t i = 0; i < scores.size(); ++i) {
-        const auto& val = scores[i];
-        if (val >= bound) {
-            in_use.push_back(i);
-        }
-        if (i != best_label && next_score < val) {
-            next_score = val;
-        }
-    }
-
-    return std::make_pair(best_label, max_score - next_score); 
-}
-
-inline std::pair<int, double> replace_labels_in_use(const std::vector<double>& scores, double threshold, std::vector<int>& in_use) {
-    auto it = std::max_element(scores.begin(), scores.end());
-    int best_index = it - scores.begin();
-    double max_score = *it;
-
-    int best_label = in_use[best_index];
-    size_t counter = 0;
-
-    constexpr double DUMMY = -1000;
-    double next_score = DUMMY;
-    const double bound = max_score - threshold;
-
-    for (size_t i = 0; i < scores.size(); ++i) {
-        const auto& val = scores[i];
-        if (val >= bound) {
-            in_use[counter] = in_use[i];
-            ++counter;
-        }
-        if (i != best_index && next_score < val) {
-            next_score = val;
-        }
-    }
-
-    in_use.resize(counter);
-    return std::make_pair(best_label, max_score - next_score); 
-}
-
-class FineTuner {
-    std::vector<int> labels_in_use;
-
-    std::unordered_map<int, int> gene_subset;
-
-    std::vector<double> scaled_left, scaled_right;
-
-    std::vector<double> all_correlations;
-
-    RankedVector<double, int> input_sub;
-
-    RankedVector<int, int> ref_sub;
-
-public:
-    template<bool test = false>
-    std::pair<int, double> run(
-        const RankedVector<double, int>& input, 
-        const std::vector<Reference>& ref,
-        const Markers& markers,
-        std::vector<double>& scores,
-        double quantile,
-        double threshold)
-    {
-        if (scores.size() <= 1) {
-            return std::make_pair(0, std::numeric_limits<double>::quiet_NaN());
-        } 
-
-        auto candidate = fill_labels_in_use(scores, threshold, labels_in_use);
-
-        // If there's only one top label, we don't need to do anything else.
-        if (labels_in_use.size() == 1) {
-            return candidate;
-        } 
-
-        // We also give up if every label is in range, because any subsequent
-        // calculations would use all markers and just give the same result.
-        // The 'test' parameter allows us to skip this bypass for testing.
-        if constexpr(!test) {
-            if (labels_in_use.size() == ref.size()) {
-                return candidate;
-            }
-        }
-
-        while (labels_in_use.size() > 1) {
-            gene_subset.clear();
-            gene_subset.reserve(input.size()); // known maximum.
-            for (auto l : labels_in_use) {
-                for (auto l2 : labels_in_use){ 
-                    const auto& current = markers[l][l2];
-                    for (auto c : current) {
-                        if (gene_subset.find(c) == gene_subset.end()) {
-                            const int counter = gene_subset.size();
-                            gene_subset[c] = counter;
-                        }
-                    }
-                }
-            }
-
-            input_sub.clear();
-            subset_ranks(input, input_sub, gene_subset);
-            scaled_left.resize(gene_subset.size());
-            scaled_ranks(input_sub, scaled_left.data());
-
-            scaled_right.resize(gene_subset.size());
-            scores.clear();
-
-            for (size_t i = 0; i < labels_in_use.size(); ++i) {
-                auto curlab = labels_in_use[i];
-
-                all_correlations.clear();
-                const auto& curref = ref[curlab];
-                size_t NR = curref.index->ndim();
-                size_t NC = curref.index->nobs();
-
-                for (size_t c = 0; c < NC; ++c) {
-                    // Technically we could be faster if we remembered the
-                    // subset from the previous fine-tuning iteration, but this
-                    // requires us to (possibly) make a copy of the entire
-                    // reference set; we can't afford to do this in each thread.
-                    ref_sub.clear();
-                    subset_ranks(curref.ranked[c], ref_sub, gene_subset);
-                    scaled_ranks(ref_sub, scaled_right.data());
-
-                    double cor = distance_to_correlation(scaled_left.size(), scaled_left, scaled_right);
-                    all_correlations.push_back(cor);
-                }
-
-                double score = correlations_to_scores(all_correlations, quantile);
-                scores.push_back(score);
-            }
-
-            candidate = replace_labels_in_use(scores, threshold, labels_in_use); 
-            if (labels_in_use.size() == scores.size()) { // i.e., unchanged.
-                break;
-            }
-        }
-
-        return candidate;
-    }
-};
-
-}
-
-#endif
diff --git a/inst/include/singlepp/load_references.hpp b/inst/include/singlepp/load_references.hpp
deleted file mode 100644
index 412e329..0000000
--- a/inst/include/singlepp/load_references.hpp
+++ /dev/null
@@ -1,650 +0,0 @@
-#ifndef SINGLEPP_LOADERS_HPP
-#define SINGLEPP_LOADERS_HPP
-
-#include "macros.hpp"
-
-#include "byteme/PerByte.hpp"
-#include "byteme/RawFileReader.hpp"
-#include "tatami/tatami.hpp"
-
-#ifdef SINGLEPP_USE_ZLIB
-#include "byteme/GzipFileReader.hpp"
-#include "byteme/ZlibBufferReader.hpp"
-#endif
-
-#include "Markers.hpp"
-
-#include <string>
-#include <vector>
-#include <cctype>
-#include <type_traits>
-
-/**
- * @file load_references.hpp
- *
- * @brief Load reference datasets from a few expected formats.
- */
-
-namespace singlepp {
-
-/** 
- * @cond
- */
-template<bool parallel_>
-using SuperPerByte = typename std::conditional<parallel_, byteme::PerByte<char>, byteme::PerByteParallel<char> >::type;
-
-template<bool parallel_, class Reader_>
-std::vector<int> load_labels_internal(Reader_& reader) {
-    bool non_empty = false;
-    int current = 0;
-    std::vector<int> labels;
-    bool remaining = true;
-
-    SuperPerByte<parallel_> pb(&reader);
-    bool okay = pb.valid();
-    while (okay) {
-        char x = pb.get();
-        okay = pb.advance();
-
-        if (x == '\n') {
-            if (!non_empty) {
-                throw std::runtime_error("label index must be an integer");
-            }
-            labels.push_back(current);
-            current = 0;
-            non_empty = false;
-        } else if (std::isdigit(x)) {
-            non_empty = true;
-            current *= 10;
-            current += (x - '0');
-        } else {
-            throw std::runtime_error("label index must be an integer");
-        }
-    } 
-
-    if (non_empty) {
-        labels.push_back(current);
-    }
-
-    return labels;
-}
-/** 
- * @endcond
- */
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a text file containing the labels.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return Vector containing the label index for each reference profile.
- *
- * The file should contain one line per profile, containing an integer label index for that profile.
- * Label indices refer to another array containing the actual names of the labels.
- * The total number of lines should be equal to the number of profiles in the dataset.
- * The file should not contain any header.
- */
-template<bool parallel_ = false>
-std::vector<int> load_labels_from_text_file(const char* path, size_t buffer_size = 65536) {
-    byteme::RawFileReader reader(path, buffer_size);
-    return load_labels_internal<parallel_>(reader);
-}
-
-#ifdef SINGLEPP_USE_ZLIB
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a Gzip-compressed file containing the labels.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return Vector containing the label index for each reference profile.
- *
- * See `load_labels_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-std::vector<int> load_labels_from_gzip_file(const char* path, size_t buffer_size = 65536) {
-    byteme::GzipFileReader reader(path, buffer_size);
-    return load_labels_internal<parallel_>(reader);
-}
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param[in] buffer Pointer to an array containing a Zlib/Gzip-compressed string of labels.
- * @param len Length of the array for `buffer`.
- * @param buffer_size Size of the buffer to use when decompressing the buffer.
- *
- * @return Vector containing the label index for each reference profile.
- *
- * See `load_labels_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-inline std::vector<int> load_labels_from_zlib_buffer(const unsigned char* buffer, size_t len, size_t buffer_size = 65536) {
-    byteme::ZlibBufferReader reader(buffer, len, 3, buffer_size);
-    return load_labels_internal<parallel_>(reader);
-}
-
-#endif
-
-/**
- * @cond
- */
-template<bool parallel_, class Reader_>
-std::vector<std::string> load_names_internal(Reader_& reader) {
-    std::string current;
-    std::vector<std::string> names;
-
-    SuperPerByte<parallel_> pb(&reader);
-    bool okay = pb.valid();
-    while (okay) {
-        char x = pb.get();
-        okay = pb.advance();
-
-        if (x == '\n') {
-            names.emplace_back(std::move(current));
-            current.clear();
-        } else {
-            current += x;
-        }
-    }
-
-    if (!current.empty()) { // absence of trailing newline is okay.
-        names.emplace_back(std::move(current));
-    }
-
-    return names;
-};
-/** 
- * @endcond
- */
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a text file containing the label names.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return Vector of strings containing the name for each reference profile.
- *
- * The file should contain one line per label, containing a (non-quoted) string with the full name for that label.
- * The total number of lines should be equal to the number of unique labels in the dataset.
- * The file should not contain any header.
- */
-template<bool parallel_ = false>
-std::vector<std::string> load_label_names_from_text_file(const char* path, size_t buffer_size = 65536) {
-    byteme::RawFileReader reader(path, buffer_size);
-    return load_names_internal<parallel_>(reader);
-}
-
-#ifdef SINGLEPP_USE_ZLIB
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a Gzip-compressed file containing the label names.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return Vector of strings containing the name for each label.
- *
- * See `load_label_names_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-std::vector<std::string> load_label_names_from_gzip_file(const char* path, size_t buffer_size = 65536) {
-    byteme::GzipFileReader reader(path, buffer_size);
-    return load_names_internal<parallel_>(reader);
-}
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param[in] buffer Pointer to an array containing a Zlib/Gzip-compressed string of label names.
- * @param len Length of the array for `buffer`.
- * @param buffer_size Size of the buffer to use when decompressing the buffer.
- *
- * @return Vector of strings containing the name for each label.
- *
- * See `load_label_names_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-std::vector<std::string> load_label_names_from_zlib_buffer(const unsigned char* buffer, size_t len, size_t buffer_size = 65536) {
-    byteme::ZlibBufferReader reader(buffer, len, 3, buffer_size);
-    return load_names_internal<parallel_>(reader);
-}
-
-#endif
-
-/** 
- * @cond
- */
-template<bool parallel_, class Reader_>
-std::pair<std::vector<std::string>, std::vector<std::string> > load_features_internal(Reader_& reader) {
-    std::string current;
-    std::vector<std::string> ensembl, symbols;
-
-    SuperPerByte<parallel_> pb(&reader);
-    bool okay = pb.valid();
-    while (okay) {
-        current.clear();
-
-        // Pulling out the Ensembl ID.
-        do {
-            char x = pb.get();
-            if (x == ',') { // don't advance yet, so that okay check below doesn't trigger if the symbol is empty and file is not newline terminated.
-                break;
-            } else if (x == '\n') {
-                okay = false; // hit the error below.
-                break;
-            }
-            current += x;
-            okay = pb.advance();
-        } while (okay);
-
-        if (!okay) {
-            throw std::runtime_error("two comma-separated fields (Ensembl ID and symbol) expected on each line");
-        }
-        okay = pb.advance();
-
-        ensembl.push_back(std::move(current));
-        current.clear();
-
-        // Now pulling out the gene symbol.
-        while (okay) {
-            char x = pb.get();
-            okay = pb.advance(); 
-            if (x == '\n') {
-                break;
-            } 
-            current += x;
-        }
-   
-        symbols.push_back(std::move(current)); // still gets added if file is not newline terminated.
-        current.clear();
-    }
-
-    return std::make_pair(std::move(ensembl), std::move(symbols));
-}
-/** 
- * @endcond
- */
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a text file containing the feature annotation.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return Pair of vectors, each of length equal to the number of features.
- * The first contains Ensembl IDs while the second contains gene symbols.
- *
- * The file should contain one line per feature, with total number of lines equal to the number of features in the dataset.
- * Each line should contain two strings separated by a comma.
- * The first string should be the Ensembl ID while the second string should be the gene symbol; either string may be empty.
- * The file should not contain any header.
- */
-template<bool parallel_ = false>
-std::pair<std::vector<std::string>, std::vector<std::string> > load_features_from_text_file(const char* path, size_t buffer_size = 65536) {
-    byteme::RawFileReader reader(path, buffer_size);
-    return load_features_internal<parallel_>(reader);
-}
-
-#ifdef SINGLEPP_USE_ZLIB
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a Gzip-compressed file containing the feature annotation.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return Pair of vectors, each of length equal to the number of features.
- * The first contains Ensembl IDs while the second contains gene symbols.
- *
- * See `load_features_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-std::pair<std::vector<std::string>, std::vector<std::string> > load_features_from_gzip_file(const char* path, size_t buffer_size = 65536) {
-    byteme::GzipFileReader reader(path, buffer_size);
-    return load_features_internal<parallel_>(reader);
-}
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param[in] buffer Pointer to an array containing a Zlib/Gzip-compressed string containing the feature annotation.
- * @param len Length of the array for `buffer`.
- * @param buffer_size Size of the buffer to use when decompressing the buffer.
- *
- * @return Pair of vectors, each of length equal to the number of features.
- * The first contains Ensembl IDs while the second contains gene symbols.
- *
- * See `load_features_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-std::pair<std::vector<std::string>, std::vector<std::string> > load_features_from_zlib_buffer(const unsigned char* buffer, size_t len, size_t buffer_size = 65536) {
-    byteme::ZlibBufferReader reader(buffer, len, 3, buffer_size);
-    return load_features_internal<parallel_>(reader);
-}
-
-#endif
-
-/**
- * Matrix of ranks as a dense column-major matrix.
- * Each column corresponds to a sample while each row corresponds to a feature.
- * Each column contains the ranked expression values for all features.
- *
- * @tparam Data Numeric type for data in the matrix interface.
- * @tparam Index Integer type for indices in the matrix interface.
- */
-template<typename Data = double, typename Index = int>
-using RankMatrix = tatami::DenseColumnMatrix<Data, Index, std::vector<int> >;
-
-/** 
- * @cond
- */
-template<typename Data, typename Index, bool parallel_, class Reader>
-RankMatrix<Data, Index> load_rankings_internal(Reader& reader) {
-    size_t nfeatures = 0;
-    size_t line = 0;
-    std::vector<int> values;
-
-    int field = 0;
-    bool non_empty = false;
-    int current = 0;
-
-    bool has_nfeatures = false;
-    auto check_nfeatures = [&]() -> void {
-        if (!has_nfeatures) {
-            has_nfeatures = true;
-            nfeatures = field + 1;
-        } else if (field + 1 != nfeatures) {
-            throw std::runtime_error("number of fields on each line should be equal to the number of features");
-        }
-    };
-
-    SuperPerByte<parallel_> pb(&reader);
-    bool okay = pb.valid();
-    while (okay) {
-        char x = pb.get();
-        okay = pb.advance();
-
-        if (x == '\n') {
-            check_nfeatures();
-            if (!non_empty) {
-                throw std::runtime_error("fields should not be empty");
-            }
-            values.push_back(current);
-            current = 0;
-            field = 0;
-            non_empty = false;
-            ++line;
-
-        } else if (x == ',') {
-            if (!non_empty) {
-                throw std::runtime_error("fields should not be empty");
-            }
-            values.push_back(current);
-            current = 0;
-            ++field;
-            non_empty = false;
-
-        } else if (std::isdigit(x)) {
-            non_empty = true;
-            current *= 10;
-            current += (x - '0');
-
-        } else {
-            throw std::runtime_error("fields should only contain integer ranks");
-        }
-    }
-
-    if (field || non_empty) { // aka no terminating newline.
-        check_nfeatures();
-        if (!non_empty) {
-            throw std::runtime_error("fields should not be empty");
-        }
-        values.push_back(current);
-        ++line;
-    }
-
-    return RankMatrix<Data, Index>(nfeatures, line, std::move(values));
-};
-/** 
- * @endcond
- */
-
-/**
- * @tparam Data Numeric type for data in the matrix interface.
- * @tparam Index Integer type for indices in the matrix interface.
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- * 
- * @param path Path to a text file containing the ranking matrix.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return A `RankMatrix` containing the feature rankings for each reference profile.
- * Each column corresponds to a reference profile while each row corresponds to a feature.
- *
- * The file should contain one line per reference profile, with the total number of lines equal to the number of profiles in the dataset.
- * Each line should contain the rank of each feature's expression within that profile, separated by commas.
- * The number of comma-separated fields on each line should be equal to the number of features.
- * Ranks should be strictly integer - tied ranks should default to the minimum rank among the index set of ties.
- */
-template<typename Data = double, typename Index = int, bool parallel_ = false>
-RankMatrix<Data, Index> load_rankings_from_text_file(const char* path, size_t buffer_size = 65536) {
-    byteme::RawFileReader reader(path, buffer_size);
-    return load_rankings_internal<Data, Index, parallel_>(reader);
-}
-
-#ifdef SINGLEPP_USE_ZLIB
-
-/**
- * @tparam Data Numeric type for data in the matrix interface.
- * @tparam Index Integer type for indices in the matrix interface.
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a Gzip-compressed file containing the ranking matrix.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return A `RankMatrix` containing the feature rankings for each reference profile.
- * Each column corresponds to a reference profile while each row corresponds to a feature.
- *
- * See `load_rankings_from_text_file()` for details about the format.
- */
-template<typename Data = double, typename Index = int, bool parallel_ = false>
-RankMatrix<Data, Index> load_rankings_from_gzip_file(const char* path, size_t buffer_size = 65536) {
-    byteme::GzipFileReader reader(path, buffer_size);
-    return load_rankings_internal<Data, Index, parallel_>(reader);
-}
-
-/**
- * @tparam Data Numeric type for data in the matrix interface.
- * @tparam Index Integer type for indices in the matrix interface.
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param[in] buffer Pointer to an array containing a Zlib/Gzip-compressed string containing the ranking matrix.
- * @param len Length of the array for `buffer`.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return A `RankMatrix` containing the feature rankings for each reference profile.
- * Each column corresponds to a reference profile while each row corresponds to a feature.
- *
- * See `load_rankings_from_text_file()` for details about the format.
- */
-template<typename Data = double, typename Index = int, bool parallel_ = false>
-RankMatrix<Data, Index> load_rankings_from_zlib_buffer(const unsigned char* buffer, size_t len, size_t buffer_size = 65536) {
-    byteme::ZlibBufferReader reader(buffer, len, 3, buffer_size);
-    return load_rankings_internal<Data, Index, parallel_>(reader);
-}
-
-#endif
-
-/** 
- * @cond
- */
-template<bool parallel_, class Reader>
-Markers load_markers_internal(size_t nfeatures, size_t nlabels, Reader& reader) {
-    Markers markers(nlabels);
-    for (auto& m : markers) {
-        m.resize(nlabels);
-    }
-
-    std::vector<int> values;
-    SuperPerByte<parallel_> pb(&reader);
-    bool okay = pb.valid();
-    while (okay) {
-
-        // Processing the label IDs.
-        size_t first = 0, second = 0;
-        for (int l = 0; l < 2; ++l) {
-            auto& current = (l == 0 ? first : second);
-            bool non_empty = false;
-
-            do {
-                char x = pb.get();
-                okay = pb.advance();
-
-                if (x == '\t') {
-                    if (!non_empty) {
-                        throw std::runtime_error("empty field detected in the label indices");
-                    }
-                    break;
-                } else if (x == '\n') {
-                    okay = false; // hit the error below.
-                    break;
-                } else if (!std::isdigit(x)) {
-                    throw std::runtime_error("label indices should be integers");
-                }
-
-                non_empty = true;
-                current *= 10;
-                current += (x - '0');
-            } while (okay);
-
-            if (!okay) {
-                throw std::runtime_error("expected at least three tab-separated fields on each line");
-            }
-            if (current >= markers.size()) {
-                throw std::runtime_error("label index out of range");
-            }
-        }
-
-        // Processing the actual gene indices.
-        bool non_empty = false;
-        int current = 0;
-        while (okay) {
-            char x = pb.get();
-            okay = pb.advance();
-
-            if (std::isdigit(x)) {
-                non_empty = true;
-                current *= 10;
-                current += (x - '0');
-
-            } else if (x == '\t') {
-                if (!non_empty) {
-                    throw std::runtime_error("gene index fields should not be empty");
-                }
-                values.push_back(current);
-                current = 0;
-                non_empty = false;
-
-            } else if (x == '\n') {
-                break;
-
-            } else {
-                throw std::runtime_error("gene index fields should be integers");
-            }
-        }
-
-        // Adding the last element. We don't do this inside the newline check,
-        // as we need to account for cases where the file is not newline-terminated.
-        if (!non_empty) {
-            throw std::runtime_error("gene index fields should not be empty");
-        }
-        values.push_back(current);
-
-        for (auto v : values) {
-            if (static_cast<size_t>(v) >= nfeatures) {
-                throw std::runtime_error("gene index out of range");
-            }
-        }
-
-        auto& store = markers[first][second];
-        if (!store.empty()) {
-            throw std::runtime_error("multiple marker sets listed for a single pairwise comparison");
-        }
-        store.swap(values); // implicit clear of 'values', as store is empty.
-    }
-
-    return markers;
-}
-/** 
- * @endcond
- */
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a text file containing the marker lists.
- * @param nfeatures Total number of features in the dataset.
- * @param nlabels Number of labels in the dataset.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return A `Markers` object containing the markers from each pairwise comparison between labels.
- *
- * The file should contain one line per pairwise comparison between labels.
- * Each line should at least 3 tab-delimited fields - the index of the first label, the index of the second label, 
- * and then the indices of the features selected as marker genes for the first label relative to the second.
- * Any (non-zero) number of marker indices may be reported provided they are ordered by marker strength.
- * The total number of lines in this file should be equal to the total number of pairwise comparisons between different labels, including permutations.
- */
-template<bool parallel_ = false>
-Markers load_markers_from_text_file(const char* path, size_t nfeatures, size_t nlabels, size_t buffer_size = 65536) {
-    byteme::RawFileReader reader(path, buffer_size);
-    return load_markers_internal<parallel_>(nfeatures, nlabels, reader);
-}
-
-#ifdef SINGLEPP_USE_ZLIB
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param path Path to a Gzip-compressed file containing the marker lists.
- * @param nfeatures Total number of features in the dataset.
- * @param nlabels Number of labels in the dataset.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return A `Markers` object containing the markers from each pairwise comparison between labels.
- *
- * See `load_markers_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-Markers load_markers_from_gzip_file(const char* path, size_t nfeatures, size_t nlabels, size_t buffer_size = 65536) {
-    byteme::GzipFileReader reader(path, buffer_size);
-    return load_markers_internal<parallel_>(nfeatures, nlabels, reader);
-}
-
-/**
- * @tparam parallel_ Whether file loading and parsing should be parallelized.
- *
- * @param[in] buffer Pointer to an array containing a Zlib/Gzip-compressed string containing the marker lists.
- * @param len Length of the array for `buffer`.
- * @param nfeatures Total number of features in the dataset.
- * @param nlabels Number of labels in the dataset.
- * @param buffer_size Size of the buffer to use when reading the file.
- *
- * @return A `Markers` object containing the markers from each pairwise comparison between labels.
- *
- * See `load_markers_from_text_file()` for details about the format.
- */
-template<bool parallel_ = false>
-Markers load_markers_from_zlib_buffer(const unsigned char* buffer, size_t len, size_t nfeatures, size_t nlabels, size_t buffer_size = 65536) {
-    byteme::ZlibBufferReader reader(buffer, len, 3, buffer_size);
-    return load_markers_internal<parallel_>(nfeatures, nlabels, reader);
-}
-
-#endif
-
-}
-
-#endif
diff --git a/inst/include/singlepp/macros.hpp b/inst/include/singlepp/macros.hpp
deleted file mode 100644
index 8ac1ddd..0000000
--- a/inst/include/singlepp/macros.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef SINGLEPP_MACROS_HPP
-#define SINGLEPP_MACROS_HPP
-
-/**
- * @file macros.hpp
- *
- * @brief Set common macros used throughout **singlepp**.
- *
- * @details
- * The `SINGLEPP_CUSTOM_PARALLEL` macro can be set to a function that specifies a custom parallelization scheme.
- * This function should be a template that accept three arguments:
- *
- * - `fun`, a lambda that accepts three arguments: 
- *   - `thread`, the thread number.
- *   - `start`, the index of the first job for this thread.
- *   - `length`, the number of jobs for this thread.
- * - `njobs`, an integer specifying the number of jobs.
- * - `nthreads`, an integer specifying the number of threads to use.
- *
- * The function should split `[0, njobs)` into any number of contiguous, non-overlapping intervals, and call `fun` on each interval, possibly in different threads.
- * The details of the splitting and evaluation are left to the discretion of the developer defining the macro. 
- * The function should only return once all evaluations of `fun` are complete.
- *
- * If `SINGLEPP_CUSTOM_PARALLEL` is set, the following macros are also set (if they are not already defined):
- *
- * - `TATAMI_CUSTOM_PARALLEL`, from the [**tatami**](https://ltla.github.io/tatami) library.
- *
- * This ensures that any custom parallelization scheme is propagated to all of **singlepp**'s dependencies.
- * If these libraries are used outside of **singlepp**, some care is required to ensure that the macros are consistently defined through the client library/application;
- * otherwise, developers may observe ODR compilation errors. 
- */
-
-// Synchronizing all parallelization schemes.
-#ifdef SINGLEPP_CUSTOM_PARALLEL
-
-#ifndef TATAMI_CUSTOM_PARALLEL
-#define TATAMI_CUSTOM_PARALLEL SINGLEPP_CUSTOM_PARALLEL
-#endif
-
-#endif
-
-#endif
diff --git a/inst/include/singlepp/process_features.hpp b/inst/include/singlepp/process_features.hpp
deleted file mode 100644
index a8e533e..0000000
--- a/inst/include/singlepp/process_features.hpp
+++ /dev/null
@@ -1,153 +0,0 @@
-#ifndef SINGLEPP_PROCESS_FEATURES_HPP
-#define SINGLEPP_PROCESS_FEATURES_HPP
-
-#include "macros.hpp"
-
-#include "Markers.hpp"
-
-#include <vector>
-#include <algorithm>
-#include <unordered_map>
-#include <unordered_set>
-
-namespace singlepp {
-
-typedef std::vector<std::pair<int, int> > Intersection;
-
-template<typename Id>
-Intersection intersect_features(size_t mat_n, const Id* mat_id, size_t ref_n, const Id* ref_id) {
-    std::unordered_map<Id, std::pair<int, int> > intersection;
-    intersection.reserve(mat_n);
-    for (size_t i = 0; i < mat_n; ++i) {
-        if (intersection.find(mat_id[i]) == intersection.end()) { // only using the first occurrence of each ID in mat_id.
-            intersection[mat_id[i]] = std::make_pair<int, int>(i, -1);
-        }
-    }
-
-    for (size_t i = 0; i < ref_n; ++i) {
-        auto it = intersection.find(ref_id[i]);
-        if (it != intersection.end()) { // only using the first occurrence of each ID in ref_id.
-            auto& target = (it->second).second;
-            if (target < 0) {
-                target = i;
-            }
-        }
-    }
-
-    Intersection pairings;
-    pairings.reserve(intersection.size());
-    for (const auto& x : intersection) {
-        if (x.second.second >= 0) {
-            pairings.push_back(x.second);
-        }
-    }
-
-    return pairings;
-}
-
-inline void subset_markers(Intersection& intersection, Markers& markers, int top) {
-    std::unordered_set<int> available;
-    available.reserve(intersection.size());
-    for (const auto& in : intersection) {
-        available.insert(in.second);
-    }
-
-    // Figuring out the top markers to retain, that are _also_ in the intersection.
-    std::unordered_set<int> all_markers;
-    all_markers.reserve(intersection.size());
-
-    for (size_t i = 0; i < markers.size(); ++i) {
-        for (size_t j = 0; j < markers[i].size(); ++j) {
-            auto& current = markers[i][j];
-
-            std::vector<int> replacement;
-            size_t upper_bound = static_cast<size_t>(top >= 0 ? top : -1); // in effect, no upper bound if top = -1.
-            replacement.reserve(top >= 0 ? static_cast<size_t>(top) : current.size());
-
-            for (size_t k = 0; k < current.size() && replacement.size() < upper_bound; ++k) {
-                if (available.find(current[k]) != available.end()) {
-                    all_markers.insert(current[k]);
-                    replacement.push_back(current[k]);
-                }
-            }
-            current.swap(replacement);
-        }
-    }
-
-    // Subsetting the intersection down to the chosen set of markers.
-    std::unordered_map<int, int> mapping;
-    mapping.reserve(intersection.size());
-    {
-        size_t counter = 0;
-        for (size_t i = 0; i < intersection.size(); ++i) {
-            if (all_markers.find(intersection[i].second) != all_markers.end()) {
-                intersection[counter] = intersection[i];
-                mapping[intersection[i].second] = counter;
-                ++counter;
-            }
-        }
-        intersection.resize(counter);
-    }
-
-    // Reindexing the markers.
-    for (size_t i = 0; i < markers.size(); ++i) {
-        for (size_t j = 0; j < markers[i].size(); ++j) {
-            auto& current = markers[i][j];
-            for (size_t k = 0; k < current.size(); ++k) {
-                auto it = mapping.find(current[k]);
-                current[k] = it->second;
-            }
-        }
-    }
-
-    return;
-}
-
-// Use this method when the feature spaces are already identical.
-inline std::vector<int> subset_markers(Markers& markers, int top) {
-    std::unordered_set<int> available;
-    for (size_t i = 0; i < markers.size(); ++i) {
-        for (size_t j = 0; j < markers[i].size(); ++j) {
-            auto& current = markers[i][j];
-            if (top >= 0) {
-                current.resize(std::min(current.size(), static_cast<size_t>(top)));
-            }
-            available.insert(current.begin(), current.end());
-        }
-    }
-
-    std::vector<int> subset(available.begin(), available.end());
-    std::sort(subset.begin(), subset.end());
-
-    std::unordered_map<int, int> mapping;
-    mapping.reserve(subset.size());
-    for (size_t i = 0; i < subset.size(); ++i) {
-        mapping[subset[i]] = i;
-    }
-
-    // Reindexing the markers.
-    for (size_t i = 0; i < markers.size(); ++i) {
-        for (size_t j = 0; j < markers[i].size(); ++j) {
-            auto& current = markers[i][j];
-            for (size_t k = 0; k < current.size(); ++k) {
-                auto it = mapping.find(current[k]);
-                current[k] = it->second;
-            }
-        }
-    }
-
-    return subset;
-}
-
-inline std::pair<std::vector<int>, std::vector<int> > unzip(const Intersection& intersection) {
-    std::vector<int> left(intersection.size()), right(intersection.size());
-    for (size_t i = 0; i < intersection.size(); ++i) {
-        left[i] = intersection[i].first;
-        right[i] = intersection[i].second;
-    }
-    return std::make_pair(std::move(left), std::move(right));
-}
-
-}
-
-#endif
diff --git a/inst/include/singlepp/scaled_ranks.hpp b/inst/include/singlepp/scaled_ranks.hpp
deleted file mode 100644
index f5eb381..0000000
--- a/inst/include/singlepp/scaled_ranks.hpp
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef SINGLEPP_SCALED_RANKS_HPP
-#define SINGLEPP_SCALED_RANKS_HPP
-
-#include "macros.hpp"
-
-#include <algorithm>
-#include <vector>
-#include <cmath>
-#include <unordered_map>
-
-namespace singlepp {
-
-template<typename Stat, typename Index>
-using RankedVector = std::vector<std::pair<Stat, Index> >;
-
-// This class sanitizes any user-provided subsets so that we can provide a
-// sorted and unique subset to the tatami extractor. We then undo the sorting
-// to use the original indices in the rank filler. This entire thing is
-// necessary as the behavior of the subsets isn't something that the user can
-// easily control (e.g., if the reference/test datasets do not use the same
-// feature ordering, in which case the subset is necessarily unsorted).
-struct SubsetSorter {
-    bool use_sorted_subset = false;
-    const std::vector<int>* original_subset;
-    std::vector<int> sorted_subset, original_indices;
-
-    SubsetSorter(const std::vector<int>& sub) : original_subset(&sub) {
-        size_t num_subset = sub.size();
-        for (size_t i = 1; i < num_subset; ++i) {
-            if (sub[i] <= sub[i-1]) {
-                use_sorted_subset = true;
-                break;
-            }
-        }
-
-        if (use_sorted_subset) {
-            std::vector<std::pair<int, int> > store;
-            store.reserve(num_subset);
-            for (size_t i = 0; i < num_subset; ++i) {
-                store.emplace_back(sub[i], i);
-            }
-            
-            std::sort(store.begin(), store.end());
-            sorted_subset.reserve(num_subset);
-            original_indices.resize(num_subset);
-            for (const auto& s : store) {
-                if (sorted_subset.empty() || sorted_subset.back() != s.first) {
-                    sorted_subset.push_back(s.first);
-                }
-                original_indices[s.second] = sorted_subset.size() - 1;
-            }
-        }
-    }
-
-    const std::vector<int>& extraction_subset() const {
-        if (use_sorted_subset) {
-            return sorted_subset;
-        } else {
-            return *original_subset;
-        }
-    }
-
-    void fill_ranks(const double* ptr, RankedVector<double, int>& vec) const {
-        if (use_sorted_subset) {
-            size_t num = original_indices.size();
-            for (size_t s = 0; s < num; ++s) {
-                vec[s].first = ptr[original_indices[s]];
-                vec[s].second = s;
-            }
-        } else {
-            size_t num = original_subset->size();
-            for (size_t s = 0; s < num; ++s, ++ptr) {
-                vec[s].first = *ptr;
-                vec[s].second = s;
-            }
-        }
-        std::sort(vec.begin(), vec.end());
-    }
-};
-
-template<typename Stat, typename Index>
-void scaled_ranks(const RankedVector<Stat, Index>& collected, double* outgoing) { 
-    // Computing tied ranks. 
-    size_t cur_rank = 0;
-    auto cIt = collected.begin();
-
-    while (cIt != collected.end()) {
-        auto copy = cIt;
-        ++copy;
-        double accumulated_rank = cur_rank;
-        ++cur_rank;
-
-        while (copy != collected.end() && copy->first == cIt->first) {
-            accumulated_rank += cur_rank;
-            ++cur_rank;
-            ++copy;
-        }
-
-        double mean_rank= accumulated_rank / (copy - cIt);
-        while (cIt!=copy) {
-            outgoing[cIt->second] = mean_rank;
-            ++cIt;
-        }
-    }
-
-    // Mean-adjusting and converting to cosine values.
-    double sum_squares = 0;
-    size_t N = collected.size();
-    const double center_rank = static_cast<double>(N - 1)/2; 
-    for (size_t i = 0 ; i < N; ++i) {
-        auto& o = outgoing[i];
-        o -= center_rank;
-        sum_squares += o*o;
-    }
-
-    // Special behaviour for no-variance cells; these are left as all-zero scaled ranks.
-    sum_squares = std::max(sum_squares, 0.00000001);
-    sum_squares = std::sqrt(sum_squares)*2;
-    for (size_t i = 0; i < N; ++i) {
-        outgoing[i] /= sum_squares;
-    }
-
-    return;
-}
-
-template<typename Stat, typename Index>
-void subset_ranks(const RankedVector<Stat, Index>& x, RankedVector<Stat, Index>& output, const std::unordered_map<int, int>& subset) {
-    for (size_t i = 0; i < x.size(); ++i) {
-        auto it = subset.find(x[i].second);
-        if (it != subset.end()) {
-            output.emplace_back(x[i].first, it->second);
-        }
-    }
-    return;
-}
-
-template<typename Stat, typename Index, typename Simple>
-void simplify_ranks(const RankedVector<Stat, Index>& x, RankedVector<Simple, Index>& output) {
-    if (x.size()) {
-        Index counter = 0;
-        auto last = x[0].first;
-        for (const auto& r : x) {
-            if (r.first != last) {
-                ++counter;
-                last = r.first;
-            }
-            output.emplace_back(counter, r.second);
-        }
-    }
-    return;
-}
-
-}
-
-#endif
diff --git a/inst/include/singlepp/singlepp.hpp b/inst/include/singlepp/singlepp.hpp
deleted file mode 100644
index 21e35fa..0000000
--- a/inst/include/singlepp/singlepp.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef SINGLEPP_UMBRELLA_HPP
-#define SINGLEPP_UMBRELLA_HPP
-
-/**
- * @file singlepp.hpp
- *
- * @brief Umbrella header for all includes.
- */
-
-#include "Classifier.hpp"
-#include "Markers.hpp"
-#include "IntegratedScorer.hpp"
-#include "IntegratedBuilder.hpp"
-#include "ChooseClassicMarkers.hpp"
-#include "load_references.hpp"
-
-/**
- * @namespace singlepp
- * Cell type classification using the SingleR algorithm in C++.
- */
-
-#endif
diff --git a/inst/include/source.sh b/inst/include/source.sh
deleted file mode 100755
index b36c255..0000000
--- a/inst/include/source.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-# Fetches all of the header files.
-
-if [ ! -e source-singlepp ]
-then 
-    git clone https://github.com/LTLA/singlepp source-singlepp
-    cd source-singlepp
-else 
-    cd source-singlepp
-    git pull
-fi
-
-git checkout 53bc74819c367db9d26c785de66206fe6c4f3890
-rm -rf ../singlepp
-cp -r include/singlepp/ ../singlepp
-git checkout master
-cd -
diff --git a/man/SingleR.Rd b/man/SingleR.Rd
index ddc9c18..d706e0f 100644
--- a/man/SingleR.Rd
+++ b/man/SingleR.Rd
@@ -45,7 +45,7 @@ Row names may be different across entries but only the intersection will be used
 \item{labels}{A character vector or factor of known labels for all samples in \code{ref}.
 
 Alternatively, if \code{ref} is a list, \code{labels} should be a list of the same length.
-Each element should contain a character vector or factor specifying the label for the corresponding entry of \code{ref}.}
+Each element should contain a character vector or factor specifying the labels for the columns of the corresponding element of \code{ref}.}
 
 \item{method}{Deprecated.}
 
diff --git a/man/classifySingleR.Rd b/man/classifySingleR.Rd
index 51a71f8..13b8b9e 100644
--- a/man/classifySingleR.Rd
+++ b/man/classifySingleR.Rd
@@ -13,17 +13,20 @@ classifySingleR(
   sd.thresh = NULL,
   prune = TRUE,
   assay.type = "logcounts",
-  check.missing = TRUE,
+  check.missing = FALSE,
   num.threads = bpnworkers(BPPARAM),
   BPPARAM = SerialParam()
 )
 }
 \arguments{
 \item{test}{A numeric matrix of single-cell expression values where rows are genes and columns are cells.
+Each row should be named with the gene name.
 
 Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.}
 
 \item{trained}{A \linkS4class{List} containing the output of the \code{\link{trainSingleR}} function.
+If the row names of \code{test} are not exactly the same as the reference dataset, the call to \code{trainSingleR} should set \code{test.genes=rownames(test)}.
+
 Alternatively, a List of Lists produced by \code{\link{trainSingleR}} for multiple references.}
 
 \item{quantile}{A numeric scalar specifying the quantile of the correlation distribution to use to compute the score for each label.}
@@ -38,8 +41,7 @@ Alternatively, a List of Lists produced by \code{\link{trainSingleR}} for multip
 
 \item{assay.type}{Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \linkS4class{SummarizedExperiment}.}
 
-\item{check.missing}{Logical scalar indicating whether rows should be checked for missing values.
-If true and any missing values are found, the rows containing these values are silently removed.}
+\item{check.missing}{Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.}
 
 \item{num.threads}{Integer scalar specifying the number of threads to use for classification.}
 
diff --git a/man/combineRecomputedResults.Rd b/man/combineRecomputedResults.Rd
index 19920e9..356b9da 100644
--- a/man/combineRecomputedResults.Rd
+++ b/man/combineRecomputedResults.Rd
@@ -10,9 +10,9 @@ combineRecomputedResults(
   trained,
   quantile = 0.8,
   assay.type.test = "logcounts",
-  check.missing = TRUE,
-  allow.lost = FALSE,
+  check.missing = FALSE,
   warn.lost = TRUE,
+  allow.lost = FALSE,
   num.threads = bpnworkers(BPPARAM),
   BPPARAM = SerialParam()
 )
@@ -32,11 +32,11 @@ or (ii) running \code{trainSingleR} on each reference separately and manually ma
 \item{assay.type.test}{An integer scalar or string specifying the assay of \code{test} containing the relevant expression matrix,
 if \code{test} is a \linkS4class{SummarizedExperiment} object.}
 
-\item{check.missing}{Logical scalar indicating whether rows should be checked for missing values (and if found, removed).}
+\item{check.missing}{Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.}
 
-\item{allow.lost}{Deprecated.}
+\item{warn.lost}{Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.}
 
-\item{warn.lost}{Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are \dQuote{lost} in other references.}
+\item{allow.lost}{Deprecated.}
 
 \item{num.threads}{Integer scalar specifying the number of threads to use for index building and classification.}
 
@@ -51,7 +51,7 @@ This mimics the output of \code{\link{classifySingleR}} and contains the followi
 For any given cell, entries of this matrix are only non-\code{NA} for the assigned label in each reference;
 scores are not recomputed for the other labels.
 \item \code{labels}, a character vector containing the per-cell combined label across references.
-\item \code{references}, an integer vector specifying the reference from which the combined label was derived.
+\item \code{reference}, an integer vector specifying the reference from which the combined label was derived.
 \item \code{orig.results}, a DataFrame containing \code{results}.
 }
 It may also contain \code{pruned.labels} if these were also present in \code{results}.
diff --git a/man/getClassicMarkers.Rd b/man/getClassicMarkers.Rd
index a04662a..54a7927 100644
--- a/man/getClassicMarkers.Rd
+++ b/man/getClassicMarkers.Rd
@@ -21,13 +21,12 @@ In general, the expression values are expected to be log-transformed, see Detail
 
 Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
 
-Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references,
-in which case the row names are expected to be the same across all objects.}
+Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references.}
 
 \item{labels}{A character vector or factor of known labels for all samples in \code{ref}.
 
 Alternatively, if \code{ref} is a list, \code{labels} should be a list of the same length.
-Each element should contain a character vector or factor specifying the label for the corresponding entry of \code{ref}.}
+Each element should contain a character vector or factor specifying the labels for the columns of the corresponding element of \code{ref}.}
 
 \item{assay.type}{An integer scalar or string specifying the assay of \code{ref} containing the relevant expression matrix,
 if \code{ref} is a \linkS4class{SummarizedExperiment} object (or is a list that contains one or more such objects).}
diff --git a/man/trainSingleR.Rd b/man/trainSingleR.Rd
index af34530..ca17f58 100644
--- a/man/trainSingleR.Rd
+++ b/man/trainSingleR.Rd
@@ -7,6 +7,7 @@
 trainSingleR(
   ref,
   labels,
+  test.genes = NULL,
   genes = "de",
   sd.thresh = NULL,
   de.method = c("classic", "wilcox", "t"),
@@ -31,13 +32,15 @@ In general, the expression values are expected to be log-transformed, see Detail
 
 Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
 
-Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references,
-in which case the row names are expected to be the same across all objects.}
+Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references.}
 
 \item{labels}{A character vector or factor of known labels for all samples in \code{ref}.
 
 Alternatively, if \code{ref} is a list, \code{labels} should be a list of the same length.
-Each element should contain a character vector or factor specifying the label for the corresponding entry of \code{ref}.}
+Each element should contain a character vector or factor specifying the labels for the columns of the corresponding element of \code{ref}.}
+
+\item{test.genes}{Character vector of the names of the genes in the test dataset, i.e., the row names of \code{test} in \code{\link{classifySingleR}}.
+If \code{NULL}, it is assumed that the test dataset and \code{ref} have the same genes in the same row order.}
 
 \item{genes}{A string containing \code{"de"}, indicating that markers should be calculated from \code{ref}.
 For back compatibility, other string values are allowed but will be ignored with a deprecation warning.
@@ -83,11 +86,11 @@ if \code{ref} is a \linkS4class{SummarizedExperiment} object (or is a list that
 \item{check.missing}{Logical scalar indicating whether rows should be checked for missing values.
 If true and any missing values are found, the rows containing these values are silently removed.}
 
-\item{approximate}{Logical scalar indicating whether a faster approximate method should be used to compute the quantile.}
+\item{approximate}{Deprecated, use \code{BNPARAM} instead.}
 
 \item{num.threads}{Integer scalar specifying the number of threads to use for index building.}
 
-\item{BNPARAM}{Deprecated and ignored.}
+\item{BNPARAM}{A \linkS4class{BiocNeighborParam} object specifying how the neighbor search index should be constructed.}
 
 \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed.
 Relevant for marker detection if \code{genes = NULL}, aggregation if \code{aggr.ref = TRUE}, and \code{NA} checking if \code{check.missing = TRUE}.}
@@ -120,9 +123,15 @@ This is done by identifying the median expression within each label, and computi
 For each label, the top \code{de.n} genes with the largest differences compared to another label are chosen as markers to distinguish the two labels.
 The expression values are expected to be log-transformed and normalized.
 
-If \code{restrict} is specified, \code{ref} is subsetted to only include the rows with names that are in \code{restrict}.
-Marker selection and all subsequent classification will be performed using this restrictive subset of genes.
-This can be convenient for ensuring that only appropriate genes are used (e.g., not pseudogenes or predicted genes).
+Classification with \code{classifySingleR} assumes that the test dataset contains all marker genes that were detected from the reference.
+If the test and reference datasets do not have the same genes in the same order, we can set \code{test.genes} to the row names of the test dataset.
+This will instruct \code{trainSingleR} to only consider markers that are present in the test dataset.
+Any subsequent call to \code{classifySingleR} will also check that \code{test.genes} is consistent with \code{rownames(test)}.
+
+On a similar note, if \code{restrict} is specified, marker selection will only be performed using the specified subset of genes.
+This can be convenient for ignoring inappropriate genes like pseudogenes or predicted genes.
+It has the same effect as filtering out undesirable rows from \code{ref} prior to calling \code{trainSingleR}.
+Unlike \code{test.genes}, setting \code{restrict} does not introduce further checks on \code{rownames(test)} in \code{classifySingleR}.
 }
 \section{Custom feature specification}{
 
diff --git a/src/Makevars b/src/Makevars
deleted file mode 100644
index 4f42038..0000000
--- a/src/Makevars
+++ /dev/null
@@ -1 +0,0 @@
-PKG_CPPFLAGS=-I../inst/include/ 
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 6b96b6d..f6bdc17 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -10,6 +10,35 @@ Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
+// classify_integrated
+SEXP classify_integrated(Rcpp::RObject test, Rcpp::List results, SEXP integrated_build, double quantile, int nthreads);
+RcppExport SEXP _SingleR_classify_integrated(SEXP testSEXP, SEXP resultsSEXP, SEXP integrated_buildSEXP, SEXP quantileSEXP, SEXP nthreadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::traits::input_parameter< Rcpp::RObject >::type test(testSEXP);
+    Rcpp::traits::input_parameter< Rcpp::List >::type results(resultsSEXP);
+    Rcpp::traits::input_parameter< SEXP >::type integrated_build(integrated_buildSEXP);
+    Rcpp::traits::input_parameter< double >::type quantile(quantileSEXP);
+    Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(classify_integrated(test, results, integrated_build, quantile, nthreads));
+    return rcpp_result_gen;
+END_RCPP
+}
+// classify_single
+SEXP classify_single(Rcpp::RObject test, SEXP prebuilt, double quantile, bool use_fine_tune, double fine_tune_threshold, int nthreads);
+RcppExport SEXP _SingleR_classify_single(SEXP testSEXP, SEXP prebuiltSEXP, SEXP quantileSEXP, SEXP use_fine_tuneSEXP, SEXP fine_tune_thresholdSEXP, SEXP nthreadsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::traits::input_parameter< Rcpp::RObject >::type test(testSEXP);
+    Rcpp::traits::input_parameter< SEXP >::type prebuilt(prebuiltSEXP);
+    Rcpp::traits::input_parameter< double >::type quantile(quantileSEXP);
+    Rcpp::traits::input_parameter< bool >::type use_fine_tune(use_fine_tuneSEXP);
+    Rcpp::traits::input_parameter< double >::type fine_tune_threshold(fine_tune_thresholdSEXP);
+    Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
+    rcpp_result_gen = Rcpp::wrap(classify_single(test, prebuilt, quantile, use_fine_tune, fine_tune_threshold, nthreads));
+    return rcpp_result_gen;
+END_RCPP
+}
 // find_classic_markers
 Rcpp::List find_classic_markers(int nlabels, int ngenes, Rcpp::List labels, Rcpp::List ref, int de_n, int nthreads);
 RcppExport SEXP _SingleR_find_classic_markers(SEXP nlabelsSEXP, SEXP ngenesSEXP, SEXP labelsSEXP, SEXP refSEXP, SEXP de_nSEXP, SEXP nthreadsSEXP) {
@@ -38,9 +67,9 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// integrate_build
-SEXP integrate_build(Rcpp::IntegerVector test_features, Rcpp::List references, Rcpp::List ref_ids, Rcpp::List labels, Rcpp::List prebuilt, int nthreads);
-RcppExport SEXP _SingleR_integrate_build(SEXP test_featuresSEXP, SEXP referencesSEXP, SEXP ref_idsSEXP, SEXP labelsSEXP, SEXP prebuiltSEXP, SEXP nthreadsSEXP) {
+// train_integrated
+SEXP train_integrated(Rcpp::IntegerVector test_features, Rcpp::List references, Rcpp::List ref_ids, Rcpp::List labels, Rcpp::List prebuilt, int nthreads);
+RcppExport SEXP _SingleR_train_integrated(SEXP test_featuresSEXP, SEXP referencesSEXP, SEXP ref_idsSEXP, SEXP labelsSEXP, SEXP prebuiltSEXP, SEXP nthreadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type test_features(test_featuresSEXP);
@@ -49,45 +78,33 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< Rcpp::List >::type labels(labelsSEXP);
     Rcpp::traits::input_parameter< Rcpp::List >::type prebuilt(prebuiltSEXP);
     Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(integrate_build(test_features, references, ref_ids, labels, prebuilt, nthreads));
+    rcpp_result_gen = Rcpp::wrap(train_integrated(test_features, references, ref_ids, labels, prebuilt, nthreads));
     return rcpp_result_gen;
 END_RCPP
 }
-// integrate_run
-SEXP integrate_run(Rcpp::RObject test, Rcpp::List results, SEXP integrated_build, double quantile, int nthreads);
-RcppExport SEXP _SingleR_integrate_run(SEXP testSEXP, SEXP resultsSEXP, SEXP integrated_buildSEXP, SEXP quantileSEXP, SEXP nthreadsSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::traits::input_parameter< Rcpp::RObject >::type test(testSEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type results(resultsSEXP);
-    Rcpp::traits::input_parameter< SEXP >::type integrated_build(integrated_buildSEXP);
-    Rcpp::traits::input_parameter< double >::type quantile(quantileSEXP);
-    Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(integrate_run(test, results, integrated_build, quantile, nthreads));
-    return rcpp_result_gen;
-END_RCPP
-}
-// prebuild
-SEXP prebuild(Rcpp::RObject ref, Rcpp::IntegerVector labels, Rcpp::List markers, bool approximate, int nthreads);
-RcppExport SEXP _SingleR_prebuild(SEXP refSEXP, SEXP labelsSEXP, SEXP markersSEXP, SEXP approximateSEXP, SEXP nthreadsSEXP) {
+// train_single
+SEXP train_single(Rcpp::IntegerVector test_features, Rcpp::RObject ref, Rcpp::IntegerVector ref_features, Rcpp::IntegerVector labels, Rcpp::List markers, Rcpp::RObject builder, int nthreads);
+RcppExport SEXP _SingleR_train_single(SEXP test_featuresSEXP, SEXP refSEXP, SEXP ref_featuresSEXP, SEXP labelsSEXP, SEXP markersSEXP, SEXP builderSEXP, SEXP nthreadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
+    Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type test_features(test_featuresSEXP);
     Rcpp::traits::input_parameter< Rcpp::RObject >::type ref(refSEXP);
+    Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ref_features(ref_featuresSEXP);
     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type labels(labelsSEXP);
     Rcpp::traits::input_parameter< Rcpp::List >::type markers(markersSEXP);
-    Rcpp::traits::input_parameter< bool >::type approximate(approximateSEXP);
+    Rcpp::traits::input_parameter< Rcpp::RObject >::type builder(builderSEXP);
     Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(prebuild(ref, labels, markers, approximate, nthreads));
+    rcpp_result_gen = Rcpp::wrap(train_single(test_features, ref, ref_features, labels, markers, builder, nthreads));
     return rcpp_result_gen;
 END_RCPP
 }
-// get_subset
-Rcpp::IntegerVector get_subset(SEXP built);
-RcppExport SEXP _SingleR_get_subset(SEXP builtSEXP) {
+// get_ref_subset
+Rcpp::IntegerVector get_ref_subset(SEXP built);
+RcppExport SEXP _SingleR_get_ref_subset(SEXP builtSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::traits::input_parameter< SEXP >::type built(builtSEXP);
-    rcpp_result_gen = Rcpp::wrap(get_subset(built));
+    rcpp_result_gen = Rcpp::wrap(get_ref_subset(built));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -101,32 +118,16 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// run
-SEXP run(Rcpp::RObject test, Rcpp::IntegerVector subset, SEXP prebuilt, double quantile, bool use_fine_tune, double fine_tune_threshold, int nthreads);
-RcppExport SEXP _SingleR_run(SEXP testSEXP, SEXP subsetSEXP, SEXP prebuiltSEXP, SEXP quantileSEXP, SEXP use_fine_tuneSEXP, SEXP fine_tune_thresholdSEXP, SEXP nthreadsSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::traits::input_parameter< Rcpp::RObject >::type test(testSEXP);
-    Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type subset(subsetSEXP);
-    Rcpp::traits::input_parameter< SEXP >::type prebuilt(prebuiltSEXP);
-    Rcpp::traits::input_parameter< double >::type quantile(quantileSEXP);
-    Rcpp::traits::input_parameter< bool >::type use_fine_tune(use_fine_tuneSEXP);
-    Rcpp::traits::input_parameter< double >::type fine_tune_threshold(fine_tune_thresholdSEXP);
-    Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(run(test, subset, prebuilt, quantile, use_fine_tune, fine_tune_threshold, nthreads));
-    return rcpp_result_gen;
-END_RCPP
-}
 
 static const R_CallMethodDef CallEntries[] = {
+    {"_SingleR_classify_integrated", (DL_FUNC) &_SingleR_classify_integrated, 5},
+    {"_SingleR_classify_single", (DL_FUNC) &_SingleR_classify_single, 6},
     {"_SingleR_find_classic_markers", (DL_FUNC) &_SingleR_find_classic_markers, 6},
     {"_SingleR_grouped_medians", (DL_FUNC) &_SingleR_grouped_medians, 4},
-    {"_SingleR_integrate_build", (DL_FUNC) &_SingleR_integrate_build, 6},
-    {"_SingleR_integrate_run", (DL_FUNC) &_SingleR_integrate_run, 5},
-    {"_SingleR_prebuild", (DL_FUNC) &_SingleR_prebuild, 5},
-    {"_SingleR_get_subset", (DL_FUNC) &_SingleR_get_subset, 1},
+    {"_SingleR_train_integrated", (DL_FUNC) &_SingleR_train_integrated, 6},
+    {"_SingleR_train_single", (DL_FUNC) &_SingleR_train_single, 7},
+    {"_SingleR_get_ref_subset", (DL_FUNC) &_SingleR_get_ref_subset, 1},
     {"_SingleR_is_valid_built", (DL_FUNC) &_SingleR_is_valid_built, 1},
-    {"_SingleR_run", (DL_FUNC) &_SingleR_run, 7},
     {NULL, NULL, 0}
 };
 
diff --git a/src/integrate_run.cpp b/src/classify_integrated.cpp
similarity index 61%
rename from src/integrate_run.cpp
rename to src/classify_integrated.cpp
index b787468..9fbc20d 100644
--- a/src/integrate_run.cpp
+++ b/src/classify_integrated.cpp
@@ -3,9 +3,9 @@
 #include <vector>
 
 //[[Rcpp::export(rng=false)]]
-SEXP integrate_run(Rcpp::RObject test, Rcpp::List results, SEXP integrated_build, double quantile, int nthreads) {
+SEXP classify_integrated(Rcpp::RObject test, Rcpp::List results, SEXP integrated_build, double quantile, int nthreads) {
     Rtatami::BoundNumericPointer curtest(test);
-    IntegratedXPtr iptr(integrated_build);
+    TrainedIntegratedPointer iptr(integrated_build);
 
     // Setting up the previous results.
     std::vector<Rcpp::IntegerVector> previous_results_vec;
@@ -25,27 +25,25 @@ SEXP integrate_run(Rcpp::RObject test, Rcpp::List results, SEXP integrated_build
     Rcpp::IntegerVector best(ncells);
     Rcpp::NumericVector delta(ncells);
 
+    singlepp::ClassifyIntegratedBuffers<int, double> buffers;
+    buffers.best = static_cast<int*>(best.begin());
+    buffers.delta = static_cast<double*>(delta.begin());
+
     size_t nrefs = iptr->num_references();
     Rcpp::NumericMatrix scores(ncells, nrefs);
-    std::vector<double*> scores_ptr(nrefs);
     if (nrefs) {
-        scores_ptr[0] = static_cast<double*>(scores.begin());
+        buffers.scores.resize(nrefs);
+        buffers.scores[0] = static_cast<double*>(scores.begin());
         for (size_t l = 1; l < nrefs; ++l) {
-            scores_ptr[l] = scores_ptr[l-1] + ncells;
+            buffers.scores[l] = buffers.scores[l-1] + ncells;
         }
     }
 
     // Running the integrated scoring.
-    singlepp::IntegratedScorer scorer;
-    scorer.set_num_threads(nthreads).set_quantile(quantile);
-    scorer.run(
-        curtest->ptr.get(), 
-        previous_results, 
-        *iptr, 
-        static_cast<int*>(best.begin()),
-        scores_ptr,
-        static_cast<double*>(delta.begin())
-    );
+    singlepp::ClassifyIntegratedOptions<double> opts;
+    opts.num_threads = nthreads;
+    opts.quantile = quantile;
+    singlepp::classify_integrated(*(curtest->ptr), previous_results, *iptr, buffers, opts);
 
     return Rcpp::List::create(
         Rcpp::Named("best") = best,
diff --git a/src/classify_single.cpp b/src/classify_single.cpp
new file mode 100644
index 0000000..56275c1
--- /dev/null
+++ b/src/classify_single.cpp
@@ -0,0 +1,44 @@
+#include "utils.h" // must be before raticate, singlepp includes.
+
+#include <vector>
+
+//' @importFrom Rcpp sourceCpp
+//' @useDynLib SingleR
+//[[Rcpp::export(rng=false)]]
+SEXP classify_single(Rcpp::RObject test, SEXP prebuilt, double quantile, bool use_fine_tune, double fine_tune_threshold, int nthreads) {
+    Rtatami::BoundNumericPointer parsed(test);
+    TrainedSingleIntersectPointer built(prebuilt);
+
+    // Setting up outputs.
+    size_t ncells = parsed->ptr->ncol();
+    Rcpp::IntegerVector best(ncells);
+    Rcpp::NumericVector delta(ncells);
+
+    singlepp::ClassifySingleBuffers buffers;
+    buffers.best = static_cast<int*>(best.begin());
+    buffers.delta = static_cast<double*>(delta.begin());
+
+    size_t nlabels = built->num_labels();
+    Rcpp::NumericMatrix scores(ncells, nlabels);
+    if (nlabels) {
+        buffers.scores.resize(nlabels);
+        buffers.scores[0] = static_cast<double*>(scores.begin());
+        for (size_t l = 1; l < nlabels; ++l) {
+            buffers.scores[l] = buffers.scores[l-1] + ncells;
+        }
+    }
+
+    // Running the analysis.
+    singlepp::ClassifySingleOptions opts;
+    opts.num_threads = nthreads;
+    opts.quantile = quantile;
+    opts.fine_tune = use_fine_tune;
+    opts.fine_tune_threshold = fine_tune_threshold;
+    singlepp::classify_single_intersect(*(parsed->ptr), *built, buffers, opts);
+
+    return Rcpp::List::create(
+        Rcpp::Named("best") = best,
+        Rcpp::Named("scores") = scores,
+        Rcpp::Named("delta") = delta
+    );
+}
diff --git a/src/find_classic_markers.cpp b/src/find_classic_markers.cpp
index 87f8835..51e75f1 100644
--- a/src/find_classic_markers.cpp
+++ b/src/find_classic_markers.cpp
@@ -34,9 +34,10 @@ Rcpp::List find_classic_markers(int nlabels, int ngenes, Rcpp::List labels, Rcpp
         lab_ptrs.push_back(static_cast<const int*>(lab_vec.back().begin()));
     }
     
-    singlepp::ChooseClassicMarkers mrk;
-    mrk.set_number(de_n).set_num_threads(nthreads);
-    auto store = mrk.run(ref_ptrs, lab_ptrs);
+    singlepp::ChooseClassicMarkersOptions opts;
+    opts.number = de_n;
+    opts.num_threads = nthreads;
+    auto store = singlepp::choose_classic_markers(ref_ptrs, lab_ptrs, opts);
 
     // Returning everything in R space.
     Rcpp::List output(nlabels);
diff --git a/src/integrate_build.cpp b/src/integrate_build.cpp
deleted file mode 100644
index 7f05490..0000000
--- a/src/integrate_build.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "utils.h" // must be before all other includes.
-
-#include <vector>
-#include <memory>
-
-//[[Rcpp::export(rng=false)]]
-SEXP integrate_build(Rcpp::IntegerVector test_features, Rcpp::List references, Rcpp::List ref_ids, Rcpp::List labels, Rcpp::List prebuilt, int nthreads) {
-    singlepp::IntegratedBuilder builder;
-    builder.set_num_threads(nthreads);
-
-    size_t nrefs = references.size();
-    std::vector<Rcpp::IntegerVector> holding_labs;
-    holding_labs.reserve(nrefs);
-
-    for (size_t r = 0; r < nrefs; ++r) {
-        Rcpp::RObject curref(references[r]);
-        auto parsed = Rtatami::BoundNumericPointer(curref);
-
-        Rcpp::IntegerVector curids(ref_ids[r]);
-        holding_labs.emplace_back(labels[r]);
-        Rcpp::RObject built = prebuilt[r];
-        PrebuiltXPtr curbuilt(built);
-
-        builder.add(
-            test_features.size(),
-            static_cast<const int*>(test_features.begin()),
-            parsed->ptr.get(),
-            static_cast<const int*>(curids.begin()),
-            static_cast<const int*>(holding_labs.back().begin()), 
-            *curbuilt
-        );
-    }
-
-    auto finished = builder.finish();
-    return IntegratedXPtr(new singlepp::IntegratedReferences(std::move(finished)), true);
-}
diff --git a/src/prebuild.cpp b/src/prebuild.cpp
deleted file mode 100644
index 444d4a3..0000000
--- a/src/prebuild.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "utils.h" 
-
-#include <vector>
-#include <memory>
-
-//' @importFrom Rcpp sourceCpp
-//' @useDynLib SingleR
-//[[Rcpp::export(rng=false)]]
-SEXP prebuild(Rcpp::RObject ref, Rcpp::IntegerVector labels, Rcpp::List markers, bool approximate, int nthreads) {
-    singlepp::BasicBuilder builder;
-    builder.set_num_threads(nthreads);
-
-    // Use all available markers; assume subsetting was applied on the R side.
-    builder.set_top(-1).set_approximate(approximate);
-    
-    // Setting up the markers.
-    singlepp::Markers markers2(markers.size());
-    for (size_t m = 0; m < markers.size(); ++m) {
-        Rcpp::List curmarkers(markers[m]);
-        auto& curmarkers2 = markers2[m];
-        curmarkers2.resize(curmarkers.size());
-
-        for (size_t n = 0; n < curmarkers.size(); ++n) {
-            Rcpp::IntegerVector seq(curmarkers[n]);
-            auto& seq2 =  curmarkers2[n];
-            seq2.insert(seq2.end(), seq.begin(), seq.end());
-        }
-    }
-
-    // Building the indices.
-    auto parsed = Rtatami::BoundNumericPointer(ref);
-    auto built = builder.run(parsed->ptr.get(), static_cast<const int*>(labels.begin()), std::move(markers2));
-
-    // Moving it into the external pointer.
-    return PrebuiltXPtr(new singlepp::BasicBuilder::Prebuilt(std::move(built)), true);
-}
-
-//[[Rcpp::export(rng=false)]]
-Rcpp::IntegerVector get_subset(SEXP built) {
-    PrebuiltXPtr ptr(built);
-    return Rcpp::IntegerVector(ptr->subset.begin(), ptr->subset.end());
-}
-
-//[[Rcpp::export(rng=false)]]
-Rcpp::LogicalVector is_valid_built(SEXP built) {
-    return Rf_ScalarLogical(!!R_ExternalPtrAddr(built));
-}
diff --git a/src/run.cpp b/src/run.cpp
deleted file mode 100644
index b490897..0000000
--- a/src/run.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "utils.h" // must be before raticate, singlepp includes.
-
-#include <vector>
-
-//' @importFrom Rcpp sourceCpp
-//' @useDynLib SingleR
-//[[Rcpp::export(rng=false)]]
-SEXP run(Rcpp::RObject test, Rcpp::IntegerVector subset, SEXP prebuilt, double quantile, bool use_fine_tune, double fine_tune_threshold, int nthreads) {
-    auto parsed = Rtatami::BoundNumericPointer(test);
-    PrebuiltXPtr built(prebuilt);
-
-    // Setting up outputs.
-    size_t ncells = parsed->ptr->ncol();
-    Rcpp::IntegerVector best(ncells);
-    Rcpp::NumericVector delta(ncells);
-
-    size_t nlabels = built->num_labels();
-    Rcpp::NumericMatrix scores(ncells, nlabels);
-    std::vector<double*> scores_ptr(nlabels);
-    if (nlabels) {
-        scores_ptr[0] = static_cast<double*>(scores.begin());
-        for (size_t l = 1; l < nlabels; ++l) {
-            scores_ptr[l] = scores_ptr[l-1] + ncells;
-        }
-    }
-
-    // Running the analysis.
-    singlepp::BasicScorer runner;
-    runner.set_num_threads(nthreads);
-    runner.set_quantile(quantile).set_fine_tune(use_fine_tune).set_fine_tune_threshold(fine_tune_threshold);
-    runner.run(
-        parsed->ptr.get(), 
-        *built, 
-        static_cast<const int*>(subset.begin()),
-        static_cast<int*>(best.begin()),
-        scores_ptr,
-        static_cast<double*>(delta.begin())
-    );
-
-    return Rcpp::List::create(
-        Rcpp::Named("best") = best,
-        Rcpp::Named("scores") = scores,
-        Rcpp::Named("delta") = delta
-    );
-}
diff --git a/src/train_integrated.cpp b/src/train_integrated.cpp
new file mode 100644
index 0000000..0705b33
--- /dev/null
+++ b/src/train_integrated.cpp
@@ -0,0 +1,39 @@
+#include "utils.h" // must be before all other includes.
+
+#include <vector>
+#include <memory>
+
+//[[Rcpp::export(rng=false)]]
+SEXP train_integrated(Rcpp::IntegerVector test_features, Rcpp::List references, Rcpp::List ref_ids, Rcpp::List labels, Rcpp::List prebuilt, int nthreads) {
+    size_t nrefs = references.size();
+
+    std::vector<singlepp::TrainIntegratedInput<double, int, int> > inputs;
+    inputs.reserve(nrefs);
+    std::vector<Rcpp::IntegerVector> holding_labs;
+    holding_labs.reserve(nrefs);
+
+    for (size_t r = 0; r < nrefs; ++r) {
+        Rcpp::RObject curref(references[r]);
+        Rtatami::BoundNumericPointer parsed(curref);
+
+        Rcpp::IntegerVector curids(ref_ids[r]);
+        holding_labs.emplace_back(labels[r]);
+        Rcpp::RObject built = prebuilt[r];
+        TrainedSingleIntersectPointer curbuilt(built);
+
+        inputs.push_back(singlepp::prepare_integrated_input_intersect(
+            static_cast<int>(test_features.size()),
+            static_cast<const int*>(test_features.begin()),
+            *(parsed->ptr),
+            static_cast<const int*>(curids.begin()),
+            static_cast<const int*>(holding_labs.back().begin()), 
+            *curbuilt
+        ));
+    }
+
+    singlepp::TrainIntegratedOptions opts;
+    opts.num_threads = nthreads;
+    auto finished = singlepp::train_integrated(std::move(inputs), opts);
+
+    return TrainedIntegratedPointer(new TrainedIntegrated(std::move(finished)), true);
+}
diff --git a/src/train_single.cpp b/src/train_single.cpp
new file mode 100644
index 0000000..77aa41a
--- /dev/null
+++ b/src/train_single.cpp
@@ -0,0 +1,57 @@
+#include "utils.h" 
+#include "BiocNeighbors.h"
+
+#include <vector>
+#include <memory>
+
+//' @importFrom Rcpp sourceCpp
+//' @useDynLib SingleR
+//[[Rcpp::export(rng=false)]]
+SEXP train_single(Rcpp::IntegerVector test_features, Rcpp::RObject ref, Rcpp::IntegerVector ref_features, Rcpp::IntegerVector labels, Rcpp::List markers, Rcpp::RObject builder, int nthreads) {
+    singlepp::TrainSingleOptions opts;
+    opts.num_threads = nthreads;
+    opts.top = -1; // Use all available markers; assume subsetting was applied on the R side.
+
+    BiocNeighbors::BuilderPointer bptr(builder);
+    opts.trainer = std::shared_ptr<BiocNeighbors::Builder>(std::shared_ptr<BiocNeighbors::Builder>{}, bptr.get()); // make a no-op shared pointer.
+
+    // Setting up the markers. We assume that these are already 0-indexed on the R side.
+    singlepp::Markers<int> markers2(markers.size());
+    for (size_t m = 0; m < markers.size(); ++m) {
+        Rcpp::List curmarkers(markers[m]);
+        auto& curmarkers2 = markers2[m];
+        curmarkers2.resize(curmarkers.size());
+
+        for (size_t n = 0; n < curmarkers.size(); ++n) {
+            Rcpp::IntegerVector seq(curmarkers[n]);
+            auto& seq2 =  curmarkers2[n];
+            seq2.insert(seq2.end(), seq.begin(), seq.end());
+        }
+    }
+
+    // Building the indices.
+    Rtatami::BoundNumericPointer parsed(ref);
+    auto built = singlepp::train_single_intersect(
+        static_cast<int>(test_features.size()),
+        static_cast<const int*>(test_features.begin()),
+        *(parsed->ptr),
+        static_cast<const int*>(ref_features.begin()),
+        static_cast<const int*>(labels.begin()),
+        std::move(markers2),
+        opts
+    );
+
+    return TrainedSingleIntersectPointer(new TrainedSingleIntersect(std::move(built)), true);
+}
+
+//[[Rcpp::export(rng=false)]]
+Rcpp::IntegerVector get_ref_subset(SEXP built) {
+    TrainedSingleIntersectPointer ptr(built);
+    const auto& rsub = ptr->get_ref_subset();
+    return Rcpp::IntegerVector(rsub.begin(), rsub.end());
+}
+
+//[[Rcpp::export(rng=false)]]
+Rcpp::LogicalVector is_valid_built(SEXP built) {
+    return Rf_ScalarLogical(!!R_ExternalPtrAddr(built));
+}
diff --git a/src/utils.h b/src/utils.h
index 4cd1173..3e9f97e 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -2,16 +2,15 @@
 #define UTILS_H 
 
 #include "Rcpp.h"
-#include "Rtatami.h"
+#include "Rtatami.h" // before singlepp includes to ensure the tatami_r::parallelize() override is set.
+#include "singlepp/singlepp.hpp"
 
-// must be before singlepp includes.
-#define SINGLEPP_CUSTOM_PARALLEL tatami_r::parallelize 
-#define __ERROR_PRINTER_OVERRIDE__  REprintf // avoid R CMD check warnings about stderr in Annoy.
+typedef singlepp::TrainedSingleIntersect<int, double> TrainedSingleIntersect;
 
-#include "singlepp/singlepp.hpp"
+typedef Rcpp::XPtr<TrainedSingleIntersect> TrainedSingleIntersectPointer;
 
-typedef Rcpp::XPtr<singlepp::BasicBuilder::Prebuilt> PrebuiltXPtr;
+typedef singlepp::TrainedIntegrated<int> TrainedIntegrated;
 
-typedef Rcpp::XPtr<singlepp::IntegratedReferences> IntegratedXPtr;
+typedef Rcpp::XPtr<TrainedIntegrated> TrainedIntegratedPointer;
 
 #endif
diff --git a/tests/testthat/test-SingleR.R b/tests/testthat/test-SingleR.R
index 5b0b06a..37f6e43 100644
--- a/tests/testthat/test-SingleR.R
+++ b/tests/testthat/test-SingleR.R
@@ -70,10 +70,11 @@ test_that("SingleR handles DelayedArray inputs", {
 })
 
 test_that("SingleR works with multiple references", {
-    # Handles mismatching row names.
-    chosen0 <- sample(rownames(training), 900)
-    chosen1 <- sample(rownames(training), 900)
-    chosen2 <- sample(rownames(training), 900)
+    # Handles mismatching row names. Note that the sorting is necessary
+    # to ensure that tied genes are handled in a consistent way.
+    chosen0 <- sort(sample(rownames(training), 900))
+    chosen1 <- sort(sample(rownames(training), 900))
+    chosen2 <- sort(sample(rownames(training), 900))
 
     # Works with recomputation.
     out <- SingleR(test[chosen0,], list(training[chosen1,], training[chosen2,]), 
diff --git a/tests/testthat/test-classify.R b/tests/testthat/test-classify.R
index 4ad16e0..2537ef5 100644
--- a/tests/testthat/test-classify.R
+++ b/tests/testthat/test-classify.R
@@ -79,18 +79,6 @@ test_that("classifySingleR behaves with no-variance cells", {
     expect_identical(out$labels[-(1:10)], ref$labels[-(1:10)])
 })
 
-test_that("classifySingleR behaves with missing values", {
-    # Can't just set the first entry to NA, as we need to ensure 
-    # that the test set contains a superset of genes in the training set.
-    sce <- BiocGenerics::rbind(test[1,], test)
-    logcounts(sce)[1,1] <- NA
-
-    Q <- 0.8
-    out <- classifySingleR(sce, trained, fine.tune=FALSE, quantile=Q)
-    ref <- classifySingleR(test, trained, fine.tune=FALSE, quantile=Q)
-    expect_identical(out, ref)
-})
-
 test_that("classifySingleR works with multiple references", {
     training1 <- training2 <- training
     training1 <- training1[sample(nrow(training1)),]
@@ -98,15 +86,23 @@ test_that("classifySingleR works with multiple references", {
 
     mtrain <- trainSingleR(list(training1, training2), list(training1$label, training2$label))
     out <- classifySingleR(test, mtrain)
+    expect_identical(names(out$orig.results), c("ref1", "ref2"))
+    expect_true(all(out$reference %in% 1:2))
 
     ref1 <- classifySingleR(test, mtrain[[1]])
     ref2 <- classifySingleR(test, mtrain[[2]])
     expect_identical(out, combineRecomputedResults(list(ref1, ref2), test, mtrain))
+
+    # Preserves names of the references themselves.
+    mtrain <- trainSingleR(list(foo=training1, bar=training2), list(training1$label, training2$label))
+    out <- classifySingleR(test, mtrain)
+    expect_identical(names(out$orig.results), c("foo", "bar"))
+    expect_true(all(out$reference %in% 1:2))
 })
 
 test_that("classifySingleR behaves with silly inputs", {
     out <- classifySingleR(test[,0], trained, fine.tune=FALSE)
     expect_identical(nrow(out$scores), 0L)
     expect_identical(length(out$labels), 0L)
-    expect_error(classifySingleR(test[0,], trained, fine.tune=FALSE), "does not contain")
+    expect_error(classifySingleR(test[0,], trained, fine.tune=FALSE), "expected 'rownames(test)' to be the same", fixed=TRUE)
 })
diff --git a/tests/testthat/test-recomputed.R b/tests/testthat/test-recomputed.R
index 4c75c3f..be75d4c 100644
--- a/tests/testthat/test-recomputed.R
+++ b/tests/testthat/test-recomputed.R
@@ -19,11 +19,11 @@ test <- .mockTestData(ref)
 test <- scuttle::logNormCounts(test)
 
 ref1 <- scuttle::logNormCounts(ref1)
-train1 <- trainSingleR(ref1, labels=ref1$label)
+train1 <- trainSingleR(ref1, labels=ref1$label, test.genes=rownames(test))
 pred1 <- classifySingleR(test, train1)
 
 ref2 <- scuttle::logNormCounts(ref2)
-train2 <- trainSingleR(ref2, labels=ref2$label)
+train2 <- trainSingleR(ref2, labels=ref2$label, test.genes=rownames(test))
 pred2 <- classifySingleR(test, train2)
 
 test_that("combineRecomputedResults works as expected (light check)", {
@@ -100,47 +100,28 @@ test_that("combineRecomputedResults handles mismatches to rows and cells", {
         trained=list(train1, train2)), "not identical")
     colnames(test) <- NULL
 
-    # Correctly reorders the gene universes.
-    ref <- combineRecomputedResults(
-        results=list(pred1, pred2),
-        test=test,
-        trained=list(train1, train2))
-
+    # Responds to mismatches in the genes.
     s <- sample(nrow(test))
-    out <- combineRecomputedResults(
+    expect_error(combineRecomputedResults(
         results=list(pred1, pred2),
         test=test[s,],
-        trained=list(train1, train2))
-    expect_equal(ref, out)
+        trained=list(train1, train2)), "test.genes")
 })
 
 test_that("combineRecomputedResults emits warnings when missing genes are present", {
+    half <- nrow(test) / 2
+
     # Spiking in some missing genes.
-    ref1b <- ref1[c(1, seq_len(nrow(ref1))),]
-    rownames(ref1b)[1] <- "BLAH"
-    markers1 <- train1$markers$full
-    markers1$A$B <- c(markers1$A$B, "BLAH")
-    train1b <- trainSingleR(ref1b, labels=ref1$label, genes=markers1)
-
-    ref2b <- ref2[c(1, seq_len(nrow(ref2))),]
-    rownames(ref2b)[1] <- "WHEE"
-    markers2 <- train2$markers$full
-    markers2$A$B <- c(markers2$a$b, "WHEE")
-    train2b <- trainSingleR(ref2b, labels=ref2$label, genes=markers2)
-
-    expect_error(out <- combineRecomputedResults(
-        results=list(pred1, pred2), 
-        test=test,
-        trained=list(train1b, train2b)), "should be present")
+    ref1b <- ref1[seq_len(half),,drop=FALSE]
+    train1b <- trainSingleR(ref1b, labels=ref1$label, test.genes=rownames(test))
 
-    test2 <- test[c(1,seq_len(nrow(test)),1),]
-    rownames(test2)[1] <- "WHEE"
-    rownames(test2)[length(rownames(test2))] <- "BLAH"
+    ref2b <- ref2[half + seq_len(half),]
+    train2b <- trainSingleR(ref2b, labels=ref2$label, test.genes=rownames(test))
 
     expect_warning(out <- combineRecomputedResults(
         results=list(pred1, pred2), 
-        test=test2,
-        trained=list(train1b, train2b)), "differ in the universe")
+        test=test,
+        trained=list(train1b, train2b)), "available in each reference")
 })
 
 test_that("combineRecomputedResults is invariant to ordering", {
diff --git a/tests/testthat/test-train.R b/tests/testthat/test-train.R
index 03ead0f..f216fdd 100644
--- a/tests/testthat/test-train.R
+++ b/tests/testthat/test-train.R
@@ -128,7 +128,8 @@ test_that("trainSingleR strips out NAs", {
     out <- trainSingleR(sce, sce$label)
     ref <- trainSingleR(sce[-1,], sce$label)
 
-    expect_identical(out$ref, ref$ref)
+    expect_identical(as.matrix(out$ref), ref$ref)
+    expect_identical(out$markers, ref$markers)
 })
 
 test_that("trainSingleR behaves with multiple references, plus recomputation", {
@@ -136,16 +137,17 @@ test_that("trainSingleR behaves with multiple references, plus recomputation", {
     training1 <- training1[sample(nrow(training1)),]
     rownames(training1) <- rownames(training)
     
-    set.seed(1000)
     ref1 <- trainSingleR(training1, training1$label)
     ref2 <- trainSingleR(training2, training2$label)
-
-    set.seed(1000)
-    out <- trainSingleR(list(training1, training2), list(training1$label, training2$label), recompute=TRUE)
+    out <- trainSingleR(list(training1, training2), list(training1$label, training2$label))
 
     except.built <- setdiff(names(ref1), "built")
     expect_identical(ref1[except.built], out[[1]][except.built])
     expect_identical(ref2[except.built], out[[2]][except.built])
+
+    # Same result with names.
+    out <- trainSingleR(list(foo=training1, bar=training2), list(training1$label, training2$label))
+    expect_identical(names(out), c("foo", "bar"))
 })
 
 test_that("trainSingleR behaves with aggregation turned on", {