diff --git a/docs/clean__factor_8hpp_source.html b/docs/clean__factor_8hpp_source.html index 815bf1d..4e14df6 100644 --- a/docs/clean__factor_8hpp_source.html +++ b/docs/clean__factor_8hpp_source.html @@ -102,39 +102,44 @@
32template<typename Factor_, typename Output_>
33std::vector<Factor_> clean_factor(size_t n, const Factor_* factor, Output_* cleaned) {
-
34 std::unordered_map<Factor_, Output_> mapping;
-
35 for (size_t i = 0; i < n; ++i) {
-
36 auto current = factor[i];
-
37 mapping[current] = 0;
-
38 }
-
39
-
40 // Obtaining the sorted set of unique combinations.
-
41 std::vector<Factor_> output;
-
42 size_t nuniq = mapping.size();
-
43 output.reserve(nuniq);
-
44 for (const auto& mp : mapping) {
-
45 output.push_back(mp.first);
-
46 }
-
47 std::sort(output.begin(), output.end());
-
48
-
49 Output_ counter = 0;
-
50 for (auto key : output) {
-
51 mapping[key] = counter;
-
52 ++counter;
-
53 }
-
54
-
55 // Mapping each cell to its sorted factor.
-
56 for (size_t i = 0; i < n; ++i) {
-
57 cleaned[i] = mapping[factor[i]];
+
34 auto unique = [&]{ // scoping this in an IIFE to release map memory sooner.
+
35 std::unordered_map<Factor_, Output_> mapping;
+
36 for (size_t i = 0; i < n; ++i) {
+
37 auto current = factor[i];
+
38 auto mIt = mapping.find(current);
+
39 if (mIt != mapping.end()) {
+
40 cleaned[i] = mIt->second;
+
41 } else {
+
42 Output_ alt = mapping.size();
+
43 mapping[current] = alt;
+
44 cleaned[i] = alt;
+
45 }
+
46 }
+
47 return std::vector<std::pair<Factor_, Output_> >(mapping.begin(), mapping.end());
+
48 }();
+
49
+
50 // Remapping to a sorted set.
+
51 std::sort(unique.begin(), unique.end());
+
52 size_t nuniq = unique.size();
+
53 std::vector<Output_> remapping(nuniq);
+
54 std::vector<Factor_> output(nuniq);
+
55 for (size_t u = 0; u < nuniq; ++u) {
+
56 remapping[unique[u].second] = u;
+
57 output[u] = unique[u].first;
58 }
59
-
60 return output;
-
61}
-
-
62
-
63}
+
60 // Mapping each cell to its sorted factor.
+
61 for (size_t i = 0; i < n; ++i) {
+
62 cleaned[i] = remapping[cleaned[i]];
+
63 }
64
-
65#endif
+
65 return output;
+
66}
+ +
67
+
68}
+
69
+
70#endif
scran_aggregate
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:13
scran_aggregate::clean_factor
std::vector< Factor_ > clean_factor(size_t n, const Factor_ *factor, Output_ *cleaned)
Definition clean_factor.hpp:33
diff --git a/docs/combine__factors_8hpp.html b/docs/combine__factors_8hpp.html index 69ed20a..5b35af3 100644 --- a/docs/combine__factors_8hpp.html +++ b/docs/combine__factors_8hpp.html @@ -95,26 +95,29 @@ #include <vector>
#include <map>
#include <unordered_map>
+#include <typeindex>
#include "clean_factor.hpp"
Include dependency graph for combine_factors.hpp:
- + - - - - - - - - - - - - + + + + + + + + + + + + + +
diff --git a/docs/combine__factors_8hpp__incl.map b/docs/combine__factors_8hpp__incl.map index f01fdc9..b976ec2 100644 --- a/docs/combine__factors_8hpp__incl.map +++ b/docs/combine__factors_8hpp__incl.map @@ -1,16 +1,18 @@ - + - - - - - - - - - - - - + + + + + + + + + + + + + + diff --git a/docs/combine__factors_8hpp__incl.md5 b/docs/combine__factors_8hpp__incl.md5 index 647b019..8ac4517 100644 --- a/docs/combine__factors_8hpp__incl.md5 +++ b/docs/combine__factors_8hpp__incl.md5 @@ -1 +1 @@ -92061e5af9d29830e53632cd93a57f88 \ No newline at end of file +6b3520600db07f4334048a9902f0cae4 \ No newline at end of file diff --git a/docs/combine__factors_8hpp__incl.png b/docs/combine__factors_8hpp__incl.png index 99b3d48..b250e8b 100644 Binary files a/docs/combine__factors_8hpp__incl.png and b/docs/combine__factors_8hpp__incl.png differ diff --git a/docs/combine__factors_8hpp_source.html b/docs/combine__factors_8hpp_source.html index e14ab4c..abede85 100644 --- a/docs/combine__factors_8hpp_source.html +++ b/docs/combine__factors_8hpp_source.html @@ -97,154 +97,168 @@
5#include <vector>
6#include <map>
7#include <unordered_map>
-
8
-
9#include "clean_factor.hpp"
-
10
-
16namespace scran_aggregate {
-
17
-
38template<typename Factor_, typename Combined_>
-
-
39std::vector<std::vector<Factor_> > combine_factors(size_t n, const std::vector<const Factor_*>& factors, Combined_* combined) {
-
40 size_t nfac = factors.size();
-
41 std::vector<std::vector<Factor_> > output(nfac);
-
42
-
43 // Handling the special cases.
-
44 if (nfac == 0) {
-
45 std::fill_n(combined, n, 0);
-
46 return output;
-
47 }
-
48 if (nfac == 1) {
-
49 output[0] = clean_factor(n, factors.front(), combined);
-
50 return output;
-
51 }
-
52
-
53 // Using a map with a custom comparator that uses the index
-
54 // of first occurrence of each factor as the key. Currently using a map
-
55 // to (i) avoid issues with collisions of combined hashes and (ii)
-
56 // avoid having to write more code for sorting a vector of arrays.
-
57 auto cmp = [&](size_t left, size_t right) -> bool {
-
58 for (auto curf : factors) {
-
59 if (curf[left] < curf[right]) {
-
60 return true;
-
61 } else if (curf[left] > curf[right]) {
-
62 return false;
-
63 }
-
64 }
-
65 return false;
-
66 };
-
67
-
68 auto eq = [&](size_t left, size_t right) -> bool {
-
69 for (auto curf : factors) {
-
70 if (curf[left] != curf[right]) {
-
71 return false;
+
8#include <typeindex>
+
9
+
10#include "clean_factor.hpp"
+
11
+
17namespace scran_aggregate {
+
18
+
39template<typename Factor_, typename Combined_>
+
+
40std::vector<std::vector<Factor_> > combine_factors(size_t n, const std::vector<const Factor_*>& factors, Combined_* combined) {
+
41 size_t nfac = factors.size();
+
42 std::vector<std::vector<Factor_> > output(nfac);
+
43
+
44 // Handling the special cases.
+
45 if (nfac == 0) {
+
46 std::fill_n(combined, n, 0);
+
47 return output;
+
48 }
+
49 if (nfac == 1) {
+
50 output[0] = clean_factor(n, factors.front(), combined);
+
51 return output;
+
52 }
+
53
+
54 // Creating a hashmap on the combinations of each factor.
+
55 struct Combination {
+
56 Combination(size_t i) : index(i) {}
+
57 size_t index;
+
58 };
+
59
+
60 auto unique = [&]{ // scoping this in an IIFE to release map memory sooner.
+
61 // Using a map with a custom comparator that uses the index
+
62 // of first occurrence of each factor as the key. Currently using a map
+
63 // to (i) avoid issues with collisions of combined hashes and (ii)
+
64 // avoid having to write more code for sorting a vector of arrays.
+
65 auto cmp = [&](const Combination& left, const Combination& right) -> bool {
+
66 for (auto curf : factors) {
+
67 if (curf[left.index] < curf[right.index]) {
+
68 return true;
+
69 } else if (curf[left.index] > curf[right.index]) {
+
70 return false;
+
71 }
72 }
-
73 }
-
74 return true;
-
75 };
-
76
-
77 std::map<size_t, Combined_, decltype(cmp)> mapping(cmp);
-
78 for (size_t i = 0; i < n; ++i) {
-
79 auto mIt = mapping.find(i);
-
80 if (mIt == mapping.end() || !eq(i, mIt->first)) {
-
81 mapping.insert(mIt, std::pair<size_t, Combined_>(i, 0));
-
82 }
-
83 }
+
73 return false;
+
74 };
+
75
+
76 auto eq = [&](const Combination& left, const Combination& right) -> bool {
+
77 for (auto curf : factors) {
+
78 if (curf[left.index] != curf[right.index]) {
+
79 return false;
+
80 }
+
81 }
+
82 return true;
+
83 };
84
-
85 // Obtaining the sorted set of unique combinations; easy to do for a
-
86 // map because it's already sorted!
-
87 size_t nuniq = mapping.size();
-
88 for (auto& ofac : output) {
-
89 ofac.reserve(nuniq);
-
90 }
-
91
-
92 auto mIt = mapping.begin();
-
93 for (size_t u = 0; u < nuniq; ++u, ++mIt) {
-
94 auto ix = mIt->first;
-
95 for (size_t f = 0; f < nfac; ++f) {
-
96 output[f].push_back(factors[f][ix]);
-
97 }
-
98 mIt->second = u;
-
99 }
+
85 std::map<Combination, Combined_, decltype(cmp)> mapping(std::move(cmp));
+
86 for (size_t i = 0; i < n; ++i) {
+
87 Combination current(i);
+
88 auto mIt = mapping.find(current);
+
89 if (mIt == mapping.end() || !eq(mIt->first, current)) {
+
90 Combined_ alt = mapping.size();
+
91 mapping.insert(mIt, std::make_pair(current, alt));
+
92 combined[i] = alt;
+
93 } else {
+
94 combined[i] = mIt->second;
+
95 }
+
96 }
+
97
+
98 return std::vector<std::pair<Combination, Combined_> >(mapping.begin(), mapping.end());
+
99 }();
100
-
101 // Mapping each cell to its unique combination.
-
102 for (size_t i = 0; i < n; ++i) {
-
103 combined[i] = mapping[i];
-
104 }
-
105
-
106 return output;
-
107}
+
101 // Remapping to a sorted set.
+
102 size_t nuniq = unique.size();
+
103 for (auto& ofac : output) {
+
104 ofac.reserve(nuniq);
+
105 }
+
106 std::vector<Combined_> remapping(nuniq);
+
107 for (size_t u = 0; u < nuniq; ++u) {
+
108 auto ix = unique[u].first.index;
+
109 for (size_t f = 0; f < nfac; ++f) {
+
110 output[f].push_back(factors[f][ix]);
+
111 }
+
112 remapping[unique[u].second] = u;
+
113 }
+
114
+
115 // Mapping each cell to its sorted combination.
+
116 for (size_t i = 0; i < n; ++i) {
+
117 combined[i] = remapping[combined[i]];
+
118 }
+
119
+
120 return output;
+
121}
-
108
-
131template<typename Factor_, typename Number_, typename Combined_>
-
-
132std::vector<std::vector<Factor_> > combine_factors_unused(size_t n, const std::vector<std::pair<const Factor_*, Number_> >& factors, Combined_* combined) {
-
133 size_t nfac = factors.size();
-
134 std::vector<std::vector<Factor_> > output(nfac);
-
135
-
136 // Handling the special cases.
-
137 if (nfac == 0) {
-
138 std::fill_n(combined, n, 0);
-
139 return output;
-
140 }
-
141 if (nfac == 1) {
-
142 output[0].resize(factors[0].second);
-
143 std::iota(output[0].begin(), output[0].end(), static_cast<Combined_>(0));
-
144 std::copy_n(factors[0].first, n, combined);
-
145 return output;
-
146 }
-
147
-
148 // We iterate from back to front, where the first factor is the slowest changing.
-
149 std::copy_n(factors[nfac - 1].first, n, combined);
-
150 Combined_ mult = factors[nfac - 1].second;
-
151 for (size_t f = nfac - 1; f > 0; --f) {
-
152 const auto& finfo = factors[f - 1];
-
153 auto ff = finfo.first;
-
154 for (size_t i = 0; i < n; ++i) {
-
155 combined[i] += mult * ff[i];
-
156 }
-
157 mult *= finfo.second;
-
158 }
-
159
-
160 auto ncombos = mult;
-
161 Combined_ outer_repeats = mult;
-
162 Combined_ inner_repeats = 1;
-
163 for (size_t f = nfac; f > 0; --f) {
-
164 auto& out = output[f - 1];
-
165 out.reserve(ncombos);
-
166
-
167 const auto& finfo = factors[f - 1];
-
168 size_t initial_size = inner_repeats * finfo.second;
-
169 out.resize(initial_size);
-
170
-
171 if (inner_repeats == 1) {
-
172 std::iota(out.begin(), out.end(), static_cast<Combined_>(0));
-
173 } else {
-
174 auto oIt = out.begin();
-
175 for (Number_ l = 0; l < finfo.second; ++l) {
-
176 std::fill_n(oIt, inner_repeats, l);
-
177 oIt += inner_repeats;
-
178 }
-
179 }
-
180 inner_repeats = initial_size;
-
181
-
182 outer_repeats /= finfo.second;
-
183 for (Combined_ r = 1; r < outer_repeats; ++r) {
-
184 out.insert(out.end(), out.begin(), out.begin() + initial_size);
-
185 }
-
186 }
-
187
-
188 return output;
-
189}
+
122
+
145template<typename Factor_, typename Number_, typename Combined_>
+
+
146std::vector<std::vector<Factor_> > combine_factors_unused(size_t n, const std::vector<std::pair<const Factor_*, Number_> >& factors, Combined_* combined) {
+
147 size_t nfac = factors.size();
+
148 std::vector<std::vector<Factor_> > output(nfac);
+
149
+
150 // Handling the special cases.
+
151 if (nfac == 0) {
+
152 std::fill_n(combined, n, 0);
+
153 return output;
+
154 }
+
155 if (nfac == 1) {
+
156 output[0].resize(factors[0].second);
+
157 std::iota(output[0].begin(), output[0].end(), static_cast<Combined_>(0));
+
158 std::copy_n(factors[0].first, n, combined);
+
159 return output;
+
160 }
+
161
+
162 // We iterate from back to front, where the first factor is the slowest changing.
+
163 std::copy_n(factors[nfac - 1].first, n, combined);
+
164 Combined_ mult = factors[nfac - 1].second;
+
165 for (size_t f = nfac - 1; f > 0; --f) {
+
166 const auto& finfo = factors[f - 1];
+
167 auto ff = finfo.first;
+
168 for (size_t i = 0; i < n; ++i) {
+
169 combined[i] += mult * ff[i];
+
170 }
+
171 mult *= finfo.second;
+
172 }
+
173
+
174 auto ncombos = mult;
+
175 Combined_ outer_repeats = mult;
+
176 Combined_ inner_repeats = 1;
+
177 for (size_t f = nfac; f > 0; --f) {
+
178 auto& out = output[f - 1];
+
179 out.reserve(ncombos);
+
180
+
181 const auto& finfo = factors[f - 1];
+
182 size_t initial_size = inner_repeats * finfo.second;
+
183 out.resize(initial_size);
+
184
+
185 if (inner_repeats == 1) {
+
186 std::iota(out.begin(), out.end(), static_cast<Combined_>(0));
+
187 } else {
+
188 auto oIt = out.begin();
+
189 for (Number_ l = 0; l < finfo.second; ++l) {
+
190 std::fill_n(oIt, inner_repeats, l);
+
191 oIt += inner_repeats;
+
192 }
+
193 }
+
194 inner_repeats = initial_size;
+
195
+
196 outer_repeats /= finfo.second;
+
197 for (Combined_ r = 1; r < outer_repeats; ++r) {
+
198 out.insert(out.end(), out.begin(), out.begin() + initial_size);
+
199 }
+
200 }
+
201
+
202 return output;
+
203}
-
190
-
191}
-
192
-
193#endif
+
204
+
205}
+
206
+
207#endif
Clean up a categorical factor.
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:13
-
std::vector< std::vector< Factor_ > > combine_factors_unused(size_t n, const std::vector< std::pair< const Factor_ *, Number_ > > &factors, Combined_ *combined)
Definition combine_factors.hpp:132
+
std::vector< std::vector< Factor_ > > combine_factors_unused(size_t n, const std::vector< std::pair< const Factor_ *, Number_ > > &factors, Combined_ *combined)
Definition combine_factors.hpp:146
std::vector< Factor_ > clean_factor(size_t n, const Factor_ *factor, Output_ *cleaned)
Definition clean_factor.hpp:33
-
std::vector< std::vector< Factor_ > > combine_factors(size_t n, const std::vector< const Factor_ * > &factors, Combined_ *combined)
Definition combine_factors.hpp:39
+
std::vector< std::vector< Factor_ > > combine_factors(size_t n, const std::vector< const Factor_ * > &factors, Combined_ *combined)
Definition combine_factors.hpp:40

We can also use the aggregate_across_genes() function to sum expression values across gene sets, e.g., to compute the activity of a gene signature. This can be done with any number of gene sets, possibly with a different weight for each gene in each set.

std::vector<std::tuple<size_t, const int*, const double*> > gene_sets;
diff --git a/docs/scran__aggregate_8hpp.html b/docs/scran__aggregate_8hpp.html index e8bdf67..eddf2ac 100644 --- a/docs/scran__aggregate_8hpp.html +++ b/docs/scran__aggregate_8hpp.html @@ -99,36 +99,38 @@
- + - + - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + +
diff --git a/docs/scran__aggregate_8hpp__incl.map b/docs/scran__aggregate_8hpp__incl.map index b41278b..ed43d69 100644 --- a/docs/scran__aggregate_8hpp__incl.map +++ b/docs/scran__aggregate_8hpp__incl.map @@ -1,32 +1,34 @@ - + - + - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + diff --git a/docs/scran__aggregate_8hpp__incl.md5 b/docs/scran__aggregate_8hpp__incl.md5 index 8e5a06b..81a9c85 100644 --- a/docs/scran__aggregate_8hpp__incl.md5 +++ b/docs/scran__aggregate_8hpp__incl.md5 @@ -1 +1 @@ -7fc99d3670d91741d594f4a512d36139 \ No newline at end of file +2e4460a77fac0d8dce85f6300215acae \ No newline at end of file diff --git a/docs/scran__aggregate_8hpp__incl.png b/docs/scran__aggregate_8hpp__incl.png index 62b4cad..a55e844 100644 Binary files a/docs/scran__aggregate_8hpp__incl.png and b/docs/scran__aggregate_8hpp__incl.png differ