Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving iteration over 2nd axis under function submitted to a thread pool for 3d EDT speeds up execution time to up to 3x #50

Merged
merged 1 commit into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 31 additions & 30 deletions cpp/edt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,45 +407,45 @@ float* _edt3dsq(
ThreadPool pool(parallel);

for (size_t z = 0; z < sz; z++) {
for (size_t y = 0; y < sy; y++) {
pool.enqueue([labels, y, z, sx, sxy, wx, workspace, black_border](){
pool.enqueue([labels, sy, z, sx, sxy, wx, workspace, black_border](){
for (size_t y = 0; y < sy; y++) {
squared_edt_1d_multi_seg<T>(
(labels + sx * y + sxy * z),
(workspace + sx * y + sxy * z),
sx, 1, wx, black_border
);
});
}
}
});
}

pool.join();
pool.start(parallel);

for (size_t z = 0; z < sz; z++) {
for (size_t x = 0; x < sx; x++) {
pool.enqueue([labels, x, sxy, z, workspace, sx, sy, wy, black_border](){
pool.enqueue([labels, sxy, z, workspace, sx, sy, wy, black_border](){
for (size_t x = 0; x < sx; x++) {
squared_edt_1d_parabolic_multi_seg<T>(
(labels + x + sxy * z),
(workspace + x + sxy * z),
sy, sx, wy, black_border
);
});
}
}
});
}

pool.join();
pool.start(parallel);

for (size_t y = 0; y < sy; y++) {
for (size_t x = 0; x < sx; x++) {
pool.enqueue([labels, x, sx, y, workspace, sz, sxy, wz, black_border](){
pool.enqueue([labels, sx, y, workspace, sz, sxy, wz, black_border](){
for (size_t x = 0; x < sx; x++) {
squared_edt_1d_parabolic_multi_seg<T>(
(labels + x + sx * y),
(workspace + x + sx * y),
sz, sxy, wz, black_border
);
});
}
}
});
}

pool.join();
Expand Down Expand Up @@ -475,47 +475,48 @@ float* _binary_edt3dsq(
ThreadPool pool(parallel);

for (z = 0; z < sz; z++) {
for (y = 0; y < sy; y++) {
pool.enqueue([binaryimg, sx, y, sxy, z, workspace, wx, black_border](){
pool.enqueue([binaryimg, sy, sx, sxy, z, workspace, wx, black_border](){
for (size_t y = 0; y < sy; y++) {
squared_edt_1d_multi_seg<T>(
(binaryimg + sx * y + sxy * z),
(workspace + sx * y + sxy * z),
sx, 1, wx, black_border
);
});
}
}
});
}

pool.join();
pool.start(parallel);

size_t offset;
for (z = 0; z < sz; z++) {
for (x = 0; x < sx; x++) {
offset = x + sxy * z;
for (y = 0; y < sy; y++) {
if (workspace[offset + sx*y]) {
break;
pool.enqueue([sx, sy, sxy, z, workspace, wy, black_border, offset](){
for (size_t x = 0; x < sx; x++) {
offset = x + sxy * z;
size_t y;
for (y = 0; y < sy; y++) {
if (workspace[offset + sx*y]) {
break;
}
}
}

pool.enqueue([sx, sy, y, workspace, wy, black_border, offset](){
_squared_edt_1d_parabolic(
(workspace + offset + sx * y),
sy - y, sx, wy,
black_border || (y > 0), black_border
);
});
}
}
});
}

pool.join();
pool.start(parallel);

for (y = 0; y < sy; y++) {
for (x = 0; x < sx; x++) {
offset = x + sx * y;
pool.enqueue([sz, sxy, workspace, wz, black_border, offset](){
pool.enqueue([y, sx, sz, sxy, workspace, wz, black_border, offset](){
for (size_t x = 0; x < sx; x++) {
offset = x + sx * y;
size_t z = 0;
for (z = 0; z < sz; z++) {
if (workspace[offset + sxy*z]) {
Expand All @@ -527,8 +528,8 @@ float* _binary_edt3dsq(
sz - z, sxy, wz,
black_border || (z > 0), black_border
);
});
}
}
});
}

pool.join();
Expand Down
47 changes: 16 additions & 31 deletions cpp/test.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "edt.hpp"

#include <chrono>
#include <thread>

using namespace pyedt;
using namespace edt;
Expand Down Expand Up @@ -66,7 +67,7 @@ void test2d(int n) {
delete [] input;
}

double test3d(int n) {
void test3d(int n) {
int N = n*n*n;
int* input = new int[N]();

Expand All @@ -80,33 +81,24 @@ double test3d(int n) {

input[N / 2] = 0;

auto begin = std::chrono::high_resolution_clock::now();
printf("Warm up");
float* dest = edtsq<int>(input, n,n,n, 1.,1.,1., true); // Warmp up.
const auto processor_count = std::thread::hardware_concurrency();
for (int nw = 1; nw <= processor_count; ++nw) {
auto begin = std::chrono::high_resolution_clock::now();

float* dest = edtsq<int>(input, n,n,n, 1.,1.,1., true);
float* dest = edtsq<int>(input, n,n,n, 1.,1.,1., true, nw);

auto end = std::chrono::high_resolution_clock::now();
auto end = std::chrono::high_resolution_clock::now();

if (n < 20) {
for (int i = 0; i < n*n*n; i++) {
if (i % n == 0 && i > 0) {
printf("\n");
}
if (i % (n*n) == 0 && i > 0) {
printf("\n");
}
printf("%.2f, ", dest[i]);
}
delete []dest;

printf("\n\n\n");
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - begin)
.count();
auto secs = static_cast<double>(duration) / 1000. / 1000.;
printf("Took %.3f sec. with nw=%d\n", secs, nw);
}

delete []dest;

auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - begin)
.count();
auto secs = static_cast<double>(duration) / 1000. / 1000.;
return secs;
}

void print(int *in, float* f, float* ans, int n) {
Expand Down Expand Up @@ -223,12 +215,5 @@ void test_two_d_parabola () {
}

int main () {
// try {
// test_two_d_parabola();
// }
// catch (char const *c) {
// printf("%s", c);
// }
auto secs = test3d(512);
printf("Took %.3f sec.\n", secs);
test3d(512);
}
Loading