Skip to content

Commit

Permalink
Sprinkle in some OpenMP macros for SIMD parallelization.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Sep 1, 2024
1 parent d9b62af commit ed15765
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
24 changes: 24 additions & 0 deletions include/scran_aggregate/aggregate_across_cells.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,16 @@ void compute_aggregate_by_row(
std::fill(tmp_sums.begin(), tmp_sums.end(), 0);

if constexpr(sparse_) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < row.number; ++j) {
tmp_sums[factor[row.index[j]]] += row.value[j];
}
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < NC; ++j) {
tmp_sums[factor[j]] += row[j];
}
Expand All @@ -146,10 +152,16 @@ void compute_aggregate_by_row(
std::fill(tmp_detected.begin(), tmp_detected.end(), 0);

if constexpr(sparse_) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < row.number; ++j) {
tmp_detected[factor[row.index[j]]] += (row.value[j] > 0);
}
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < NC; ++j) {
tmp_detected[factor[j]] += (row[j] > 0);
}
Expand Down Expand Up @@ -186,13 +198,19 @@ void compute_aggregate_by_column(
auto col = ext->fetch(vbuffer.data(), ibuffer.data());
if (buffers.sums.size()) {
auto& cursum = buffers.sums[current];
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < col.number; ++i) {
cursum[col.index[i]] += col.value[i];
}
}

if (buffers.detected.size()) {
auto& curdetected = buffers.detected[current];
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < col.number; ++i) {
curdetected[col.index[i]] += (col.value[i] > 0);
}
Expand All @@ -203,13 +221,19 @@ void compute_aggregate_by_column(

if (buffers.sums.size()) {
auto cursum = buffers.sums[current] + s;
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < l; ++i) {
cursum[i] += col[i];
}
}

if (buffers.detected.size()) {
auto curdetected = buffers.detected[current] + s;
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < l; ++i) {
curdetected[i] += (col[i] > 0);
}
Expand Down
3 changes: 3 additions & 0 deletions include/scran_aggregate/combine_factors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ std::vector<std::vector<Factor_> > combine_factors_unused(size_t n, const std::v
for (size_t f = nfac - 1; f > 0; --f) {
const auto& finfo = factors[f - 1];
auto ff = finfo.first;
#ifdef _OPENMP
#pragma omp simd
#endif
for (size_t i = 0; i < n; ++i) {
combined[i] += mult * ff[i];
}
Expand Down

0 comments on commit ed15765

Please sign in to comment.