From 84a6e55d43f96794c38b9f28bbcc3b93d7db46e3 Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Wed, 22 May 2024 15:33:08 -0700 Subject: [PATCH] Updates for the latest version of the tatami library. (#271) Require the minimum version for the latest beachmat. --- DESCRIPTION | 6 +- R/getClassicMarkers.R | 1 - inst/include/singlepp/BasicBuilder.hpp | 17 +++- inst/include/singlepp/BasicScorer.hpp | 30 +++----- .../include/singlepp/ChooseClassicMarkers.hpp | 6 +- inst/include/singlepp/Classifier.hpp | 20 ++--- inst/include/singlepp/IntegratedBuilder.hpp | 77 ++++++++++--------- inst/include/singlepp/IntegratedScorer.hpp | 14 ++-- inst/include/singlepp/annotate_cells.hpp | 2 +- inst/include/singlepp/build_indices.hpp | 3 +- inst/include/source.sh | 2 +- src/grouped_medians.cpp | 20 ++++- tests/testthat/test-markers.R | 1 - 13 files changed, 105 insertions(+), 94 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7b645b6..7e96163 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: SingleR Title: Reference-Based Single-Cell RNA-Seq Annotation -Version: 2.7.0 -Date: 2024-02-13 +Version: 2.7.1 +Date: 2024-05-22 Authors@R: c(person("Dvir", "Aran", email="dvir.aran@ucsf.edu", role=c("aut", "cph")), person("Aaron", "Lun", email="infinite.monkeys.with.keyboards@gmail.com", role=c("ctb", "cre")), person("Daniel", "Bunis", role="ctb"), @@ -23,7 +23,7 @@ Imports: stats, utils, Rcpp, - beachmat, + beachmat (>= 2.21.1), parallel LinkingTo: Rcpp, diff --git a/R/getClassicMarkers.R b/R/getClassicMarkers.R index 32202cc..5ab6c93 100644 --- a/R/getClassicMarkers.R +++ b/R/getClassicMarkers.R @@ -78,7 +78,6 @@ getClassicMarkers <- function(ref, labels, assay.type="logcounts", check.missing flabels <- factor(labels[[i]]) gm <- grouped_medians(curptr, as.integer(flabels) - 1L, nlevels(flabels), nthreads = num.threads) - gm <- t(gm) colnames(gm) <- levels(flabels) ref[[i]] <- gm } diff --git a/inst/include/singlepp/BasicBuilder.hpp b/inst/include/singlepp/BasicBuilder.hpp index cc040cc..111f9bf 100644 --- a/inst/include/singlepp/BasicBuilder.hpp +++ b/inst/include/singlepp/BasicBuilder.hpp @@ -36,7 +36,7 @@ class BasicBuilder { /** * See `set_top()` for details. */ - static constexpr int top = 20; + static constexpr int top = -1; /** * See `set_approximate()` for details. @@ -204,13 +204,13 @@ class BasicBuilder { /** * Row indices of test dataset, specifying the features in the intersection. - * This has the same length as `ref_indices`. + * This has the same length as `ref_subset`, where corresponding entries refer to the same features in the respective datasets. */ std::vector mat_subset; /** * Row indices of reference dataset, specifying the features in the intersection. - * This has the same length as `mat_indices`. + * This has the same length as `mat_subset`, where corresponding entries refer to the same features in the respective datasets. */ std::vector ref_subset; @@ -221,6 +221,17 @@ class BasicBuilder { return references.size(); } + /** + * @return Number of profiles in this reference. + */ + size_t num_profiles() const { + size_t n = 0; + for (const auto& ref : references) { + n += ref.ranked.size(); + } + return n; + } + /** * @cond */ diff --git a/inst/include/singlepp/BasicScorer.hpp b/inst/include/singlepp/BasicScorer.hpp index d4f7960..af4db05 100644 --- a/inst/include/singlepp/BasicScorer.hpp +++ b/inst/include/singlepp/BasicScorer.hpp @@ -116,15 +116,13 @@ class BasicScorer { * containing the index of the row of `mat` corresponding to each gene in `built.subset`. * That is, row `mat_subset[i]` in `mat` should be the same gene as row `built.subset[i]` in the reference matrix. * @param[out] best Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the index of the assigned label for each cell. + * On output, this is filled with the index of the assigned label for each cell. * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`. - * This is filled with the (non-fine-tuned) score for each label for each cell. - * Any pointer may be `NULL` in which case the scores for that label will not be saved. + * On output, this is filled with the (non-fine-tuned) score for each label for each cell. + * Any pointer may be `NULL` in which case the scores for that label will not be reported. * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. + * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. * This may also be `NULL` in which case the deltas are not reported. - * - * @return `best`, `scores` and `delta` are filled with their output values. */ void run(const tatami::Matrix* mat, const BasicBuilder::Prebuilt& built, const int* mat_subset, int* best, std::vector& scores, double* delta) const { annotate_cells_simple( @@ -149,15 +147,13 @@ class BasicScorer { * This should have the same order and identity of genes as the reference matrix used to create `built`. * @param built An object produced by `BasicBuilder::build()`. * @param[out] best Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the index of the assigned label for each cell. + * On output, this is filled with the index of the assigned label for each cell. * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`. - * This is filled with the (non-fine-tuned) score for each label for each cell. - * Any pointer may be `NULL` in which case the scores for that label will not be saved. + * On output, this is filled with the (non-fine-tuned) score for each label for each cell. + * Any pointer may be `NULL` in which case the scores for that label will not be reported. * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. + * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. * This may also be `NULL` in which case the deltas are not reported. - * - * @return `best`, `scores` and `delta` are filled with their output values. */ void run(const tatami::Matrix* mat, const BasicBuilder::Prebuilt& built, int* best, std::vector& scores, double* delta) const { run(mat, built, built.subset.data(), best, scores, delta); @@ -243,15 +239,13 @@ class BasicScorer { * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells. * @param built An object produced by `build()` with intersections. * @param[out] best Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the index of the assigned label for each cell. + * On output, this is filled with the index of the assigned label for each cell. * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`. - * This is filled with the (non-fine-tuned) score for each label for each cell. - * Any pointer may be `NULL` in which case the scores for that label will not be saved. + * On output, this is filled with the (non-fine-tuned) score for each label for each cell. + * Any pointer may be `NULL` in which case the scores for that label will not be reported. * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. + * On output, tkkhis is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. * This may also be `NULL` in which case the deltas are not reported. - * - * @return `best`, `scores` and `delta` are filled with their output values. */ void run( const tatami::Matrix* mat, diff --git a/inst/include/singlepp/ChooseClassicMarkers.hpp b/inst/include/singlepp/ChooseClassicMarkers.hpp index 0fbe535..c84feea 100644 --- a/inst/include/singlepp/ChooseClassicMarkers.hpp +++ b/inst/include/singlepp/ChooseClassicMarkers.hpp @@ -69,7 +69,7 @@ class ChooseClassicMarkers { * * @return An appropriate number of markers for each pairwise comparison. * - * The exact expression is defined as $500 (\frac{2}{3})^{\log_2{N}}$ for $N$ labels, + * The exact expression is defined as \f$500 (\frac{2}{3})^{\log_2{N}}\f$ for \f$N\f$ labels, * which steadily decreases the markers per comparison as the number of labels increases. * This aims to avoid an excessive number of features when dealing with references with many labels. */ @@ -165,7 +165,7 @@ class ChooseClassicMarkers { int actual_number = number; if (number < 0) { - actual_number = std::round(500.0 * std::pow(2.0/3.0, std::log(static_cast(nlabels)) / std::log(2.0))); + actual_number = number_of_markers(nlabels); } if (actual_number > static_cast(ngenes)) { actual_number = ngenes; @@ -182,7 +182,7 @@ class ChooseClassicMarkers { typedef typename Matrix::value_type Value_; typedef typename Matrix::index_type Index_; std::vector rbuffer(ngenes), lbuffer(ngenes); - std::vector > > rworks(nrefs), lworks(nrefs); + std::vector > > rworks(nrefs), lworks(nrefs); #ifndef SINGLEPP_CUSTOM_PARALLEL #pragma omp for diff --git a/inst/include/singlepp/Classifier.hpp b/inst/include/singlepp/Classifier.hpp index 6cbc29c..8d784fc 100644 --- a/inst/include/singlepp/Classifier.hpp +++ b/inst/include/singlepp/Classifier.hpp @@ -209,16 +209,14 @@ class Classifier { * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details. * @param[out] best Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the index of the assigned label for each cell. + * On output, this is filled with the index of the assigned label for each cell. * @param[out] scores Vector of pointers of length equal to the number of labels. * Each pointer should point to an array of length equal to the number of columns in `mat`. - * This is filled with the (non-fine-tuned) score for that label for each cell. - * Any pointer may be `NULL` in which case the scores for that label will not be saved. + * On output, this is filled with the (non-fine-tuned) score for that label for each cell. + * Any pointer may be `NULL` in which case the scores for that label will not be reported. * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. + * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. * This may also be `NULL` in which case the deltas are not reported. - * - * @return `best`, `scores` and `delta` are filled with their output values. */ void run(const tatami::Matrix* mat, const tatami::Matrix* ref, const int* labels, Markers markers, int* best, std::vector& scores, double* delta) const { auto prebuilt = build_reference(ref, labels, std::move(markers)); @@ -256,17 +254,15 @@ class Classifier { * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details. * @param[out] best Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the index of the assigned label for each cell. + * On output, this is filled with the index of the assigned label for each cell. * @param[out] scores Vector of pointers of length equal to the number of labels. * Each pointer should point to an array of length equal to the number of columns in `mat`. - * This is filled with the (non-fine-tuned) score for that label for each cell. - * Any pointer may be `NULL` in which case the scores for that label will not be saved. + * On output, this is filled with the (non-fine-tuned) score for that label for each cell. + * Any pointer may be `NULL` in which case the scores for that label will not be reported. * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. + * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning. * This may also be `NULL` in which case the deltas are not reported. * - * @return `best`, `scores` and `delta` are filled with their output values. - * * This version of `run()` applies an intersection to find the common genes between `mat` and `ref`, based on their shared values in `mat_id` and `ref_id`. * The annotation is then performed using only the subset of common genes. * The aim is to easily accommodate differences in feature annotation between the test and reference profiles. diff --git a/inst/include/singlepp/IntegratedBuilder.hpp b/inst/include/singlepp/IntegratedBuilder.hpp index fcf6641..1ba3509 100644 --- a/inst/include/singlepp/IntegratedBuilder.hpp +++ b/inst/include/singlepp/IntegratedBuilder.hpp @@ -213,22 +213,27 @@ class IntegratedBuilder { public: /** + * Add a reference dataset to this object for later use in `finish()`. + * This overload assumes that the reference and test datasets have the same features. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * * @param ref Matrix containing the reference expression values. * Rows are features and columns are reference samples. * The number and identity of features should be identical to the test dataset to be classified in `IntegratedScorer`. * @param[in] labels Pointer to an array of label assignments. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels in `ref`, see `Markers` for more details. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. */ void add(const tatami::Matrix* ref, const int* labels, const Markers& markers) { add_internal_direct(ref, labels, markers, false); } /** + * Add a reference dataset to this object for later use in `finish()`. + * This overload automatically identifies the intersection of features between the test and reference datasets. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. + * * @tparam Id Type of the gene identifier for each row. * * @param mat_nrow Number of rows (genes) in the test dataset. @@ -243,11 +248,6 @@ class IntegratedBuilder { * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels in `ref`, see `Markers` for more details. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. - * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. */ template void add(size_t mat_nrow, @@ -263,16 +263,17 @@ class IntegratedBuilder { public: /** + * Add a reference dataset to this object for later use in `finish()`. + * This overload assumes that the reference and test datasets have the same features, + * and that the reference dataset has already been processed through `BasicBuilder::run()`. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * * @param ref Matrix containing the reference expression values. * Rows are features and columns are reference samples. * The number and identity of features should be identical to the test dataset to be classified in `IntegratedScorer`. * @param[in] labels Pointer to an array of label assignments. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param built The built reference created by running `BasicBuilder::run()` on `ref` and `labels`. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. */ void add(const tatami::Matrix* ref, const int* labels, const BasicBuilder::Prebuilt& built) { add_internal_direct(ref, labels, built.markers, built.subset); @@ -280,7 +281,12 @@ class IntegratedBuilder { } /** - * @param intersection Vector defining the intersection of features betweent the test and reference datasets. + * Add a reference dataset to this object for later use in `finish()`. + * This overload requires an existing intersection between the test and reference datasets, + * and assumes that the reference dataset has already been processed through `BasicBuilder::run()`. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * + * @param intersection Vector defining the intersection of features between the test and reference datasets. * Each entry is a pair where the first element is the row index in the test matrix, * and the second element is the row index for the corresponding feature in the reference matrix. * Each row index for either matrix should occur no more than once in `intersection`. @@ -289,11 +295,6 @@ class IntegratedBuilder { * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param built The built reference created by running `BasicBuilder::run()` on all preceding arguments. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. - * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. */ void add(const std::vector >& intersection, const tatami::Matrix* ref, @@ -312,6 +313,12 @@ class IntegratedBuilder { } /** + * Add a reference dataset to this object for later use in `finish()`. + * This overload automatically identifies the intersection of features between the test and reference datasets, + * and assumes that the reference dataset has already been processed through `BasicBuilder::run()`. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. + * * @tparam Id Type of the gene identifier for each row. * * @param mat_nrow Number of rows (genes) in the test dataset. @@ -326,11 +333,6 @@ class IntegratedBuilder { * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param built The built reference created by running `BasicBuilder::run()` on all preceding arguments. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. - * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. */ template void add(size_t mat_nrow, @@ -346,6 +348,11 @@ class IntegratedBuilder { } /** + * Add a reference dataset to this object for later use in `finish()`. + * This overload assumes that the reference and test datasets have the same features, + * and that the reference dataset has already been processed through `BasicBuilder::run()`. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * * @param intersection Vector defining the intersection of features betweent the test and reference datasets. * Each entry is a pair where the first element is the row index in the test matrix, * and the second element is the row index for the corresponding feature in the reference matrix. @@ -355,11 +362,6 @@ class IntegratedBuilder { * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param built The built reference created by running `BasicBuilder::run()` on `ref` and `labels`. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. - * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. */ void add(const std::vector >& intersection, const tatami::Matrix* ref, @@ -370,6 +372,12 @@ class IntegratedBuilder { } /** + * Add a reference dataset to this object for later use in `finish()`. + * This overload automatically identifies the intersection of features between the test and reference datasets, + * and assumes that the reference dataset has already been processed through `BasicBuilder::run()`. + * `ref` and `labels` are expected to remain valid until `finish()` is called. + * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. + * * @tparam Id Type of the gene identifier for each row. * * @param mat_nrow Number of rows (genes) in the test dataset. @@ -384,11 +392,6 @@ class IntegratedBuilder { * @param[in] labels An array of length equal to the number of columns of `ref`, containing the label for each sample. * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1. * @param built The built reference created by running `BasicBuilder::run()` on `ref` and `labels`. - * - * @return The reference dataset is registered for later use in `finish()`. - * - * `ref` and `labels` are expected to remain valid until `finish()` is called. - * `mat_id` and `mat_nrow` should also be constant for all invocations to `add()`. */ template void add(size_t mat_nrow, @@ -424,8 +427,8 @@ class IntegratedBuilder { // 'in_use' is guaranteed to be sorted and unique, see its derivation in finish(). // This means we can directly use it for indexed extraction. - auto wrk = tatami::consecutive_extractor(curmat, start, len, in_use); - std::vector buffer(wrk->index_length); + auto wrk = tatami::consecutive_extractor(curmat, false, start, len, in_use); + std::vector buffer(in_use.size()); for (int c = start, end = start + len; c < end; ++c) { auto ptr = wrk->fetch(c, buffer.data()); @@ -481,7 +484,7 @@ class IntegratedBuilder { RankedVector tmp_ranked; tmp_ranked.reserve(remapped_in_use.size()); std::vector buffer(remapped_in_use.size()); - auto wrk = tatami::consecutive_extractor(curmat, start, len, remapped_in_use); + auto wrk = tatami::consecutive_extractor(curmat, false, start, len, remapped_in_use); for (size_t c = start, end = start + len; c < end; ++c) { auto ptr = wrk->fetch(c, buffer.data()); diff --git a/inst/include/singlepp/IntegratedScorer.hpp b/inst/include/singlepp/IntegratedScorer.hpp index 8994624..e57b804 100644 --- a/inst/include/singlepp/IntegratedScorer.hpp +++ b/inst/include/singlepp/IntegratedScorer.hpp @@ -183,16 +183,14 @@ class IntegratedScorer { * containing the assigned label for each column in each reference. * @param built Set of integrated references produced by `IntegratedBuilder::finish()`. * @param[out] best Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the index of the reference with the best label for each cell. + * On output, this is filled with the index of the reference with the best label for each cell. * @param[out] scores Vector of pointers of length equal to the number of references. * Each pointer should point to an array of length equal to the number of columns in `mat`. - * This is filled with the (non-fine-tuned) score for the best label of that reference for each cell. - * Any pointer may be `NULL` in which case the scores for that label will not be saved. + * On output, this is filled with the (non-fine-tuned) score for the best label of that reference for each cell. + * Any pointer may be `NULL` in which case the scores for that label will not be reported. * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`. - * This is filled with the difference between the highest and second-highest scores. + * On output, this is filled with the difference between the highest and second-highest scores. * This may also be `NULL` in which case the deltas are not reported. - * - * @return `best`, `scores` and `delta` are filled with their output values. */ void run( const tatami::Matrix* mat, @@ -208,8 +206,8 @@ class IntegratedScorer { tatami::parallelize([&](int, int start, int len) -> void { // We perform an indexed extraction, so all subsequent indices // will refer to indices into this subset (i.e., 'built.universe'). - auto wrk = tatami::consecutive_extractor(mat, start, len, built.universe); - std::vector buffer(wrk->index_length); + auto wrk = tatami::consecutive_extractor(mat, false, start, len, built.universe); + std::vector buffer(built.universe.size()); RankedVector data_ranked, data_ranked2; data_ranked.reserve(NR); diff --git a/inst/include/singlepp/annotate_cells.hpp b/inst/include/singlepp/annotate_cells.hpp index 317bcc5..1ae2b9d 100644 --- a/inst/include/singlepp/annotate_cells.hpp +++ b/inst/include/singlepp/annotate_cells.hpp @@ -53,7 +53,7 @@ inline void annotate_cells_simple( SubsetSorter subsorted(subcopy); tatami::parallelize([&](int, int start, int length) -> void { - auto wrk = tatami::consecutive_extractor(mat, start, length, subsorted.extraction_subset()); + auto wrk = tatami::consecutive_extractor(mat, false, start, length, subsorted.extraction_subset()); RankedVector vec(num_subset); std::vector buffer(num_subset); diff --git a/inst/include/singlepp/build_indices.hpp b/inst/include/singlepp/build_indices.hpp index c026b5e..0dbe22b 100644 --- a/inst/include/singlepp/build_indices.hpp +++ b/inst/include/singlepp/build_indices.hpp @@ -62,12 +62,11 @@ std::vector build_indices(const tatami::Matrix* ref, con tatami::parallelize([&](int, int start, int len) -> void { RankedVector ranked(NR); std::vector buffer(ref->nrow()); - auto wrk = tatami::consecutive_extractor(ref, start, len, subsorter.extraction_subset()); + auto wrk = tatami::consecutive_extractor(ref, false, start, len, subsorter.extraction_subset()); for (int c = start, end = start + len; c < end; ++c) { auto ptr = wrk->fetch(c, buffer.data()); subsorter.fill_ranks(ptr, ranked); - scaled_ranks(ranked, buffer.data()); // 'buffer' can be written to, as all data is extracted to 'vec'. auto curlab = labels[c]; auto curoff = offsets[c]; diff --git a/inst/include/source.sh b/inst/include/source.sh index eb8112c..b36c255 100755 --- a/inst/include/source.sh +++ b/inst/include/source.sh @@ -9,7 +9,7 @@ else git pull fi -git checkout 1d9869c3f050521a12b3151c89bc41906bf093e0 +git checkout 53bc74819c367db9d26c785de66206fe6c4f3890 rm -rf ../singlepp cp -r include/singlepp/ ../singlepp git checkout master diff --git a/src/grouped_medians.cpp b/src/grouped_medians.cpp index 35745b3..dc31947 100644 --- a/src/grouped_medians.cpp +++ b/src/grouped_medians.cpp @@ -1,23 +1,35 @@ #include "utils.h" // must be before all other includes. +#include "tatami_stats/tatami_stats.hpp" #include //[[Rcpp::export(rng=false)]] Rcpp::NumericMatrix grouped_medians(Rcpp::RObject ref, Rcpp::IntegerVector groups, int ngroups, int nthreads) { Rtatami::BoundNumericPointer parsed(ref); - Rcpp::NumericMatrix output(ngroups, parsed->ptr->nrow()); + size_t NR = parsed->ptr->nrow(); + + Rcpp::NumericMatrix output(NR, ngroups); + double* optr = static_cast(output.begin()); + std::vector optrs; + optrs.reserve(ngroups); + for (int x = 0; x < ngroups; ++x, optr += NR) { + optrs.emplace_back(optr); + } std::vector group_sizes(ngroups); for (auto g : groups) { ++(group_sizes[g]); } - tatami::row_medians_by_group( + tatami_stats::grouped_medians::Options opt; + opt.num_threads = nthreads; + tatami_stats::grouped_medians::apply( + /* byrow = */ true, parsed->ptr.get(), static_cast(groups.begin()), group_sizes, - static_cast(output.begin()), - nthreads + optrs.data(), + opt ); return output; } diff --git a/tests/testthat/test-markers.R b/tests/testthat/test-markers.R index d810858..c7d4bdc 100644 --- a/tests/testthat/test-markers.R +++ b/tests/testthat/test-markers.R @@ -24,7 +24,6 @@ test_that("grouped_medians works as expected", { f <- factor(y) ptr <- beachmat::initializeCpp(x) obs <- SingleR:::grouped_medians(ptr, as.integer(f) - 1L, nlevels(f), 1) - obs <- t(obs) colnames(obs) <- levels(f) expect_equal(expected, obs) })