diff --git a/src/vt/collective/reduce/allreduce/rabenseifner.impl.h b/src/vt/collective/reduce/allreduce/rabenseifner.impl.h index 39c732c62f..3024743a01 100644 --- a/src/vt/collective/reduce/allreduce/rabenseifner.impl.h +++ b/src/vt/collective/reduce/allreduce/rabenseifner.impl.h @@ -132,6 +132,7 @@ template < > void Rabenseifner::executeFinalHan() { // theCB()->makeSend(parent_proxy_[this_node_]).sendTuple(std::make_tuple(val_)); + vt_debug_print(terse, allreduce, "Rabenseifner executing final handler\n"); parent_proxy_[this_node_].template invoke(val_); completed_ = true; } @@ -267,7 +268,7 @@ void Rabenseifner::scatterReduceIter() { auto dest = (vdest < nprocs_rem_) ? vdest * 2 : vdest + nprocs_rem_; vt_debug_print( terse, allreduce, - "Rabenseifner Part2 (step {}): Sending to Node {} starting with idx = {} and " + "Rabenseifner Part2 (Send step {}): To Node {} starting with idx = {} and " "count " "{} \n", scatter_step_, dest, s_index_[scatter_step_], @@ -310,7 +311,7 @@ void Rabenseifner::scatterReduceIterHandler( vt_debug_print( terse, allreduce, - "Rabenseifner Part2 (step {}): scatter_mask_= {} nprocs_pof2_ = {}: " + "Rabenseifner Part2 (Recv step {}): scatter_mask_= {} nprocs_pof2_ = {}: " "idx = {} from {}\n", msg->step_, scatter_mask_, nprocs_pof2_, r_index_[msg->step_], theContext()->getFromNodeCurrentTask() diff --git a/tests/perf/allreduce.cc b/tests/perf/allreduce.cc index 2f89677cf4..1195ea86eb 100644 --- a/tests/perf/allreduce.cc +++ b/tests/perf/allreduce.cc @@ -58,8 +58,11 @@ using namespace vt; using namespace vt::tests::perf::common; -static constexpr std::array const payloadSizes = { - 64, 128, 2048, 16384, 32768, 524288, 1048576, 2097152}; +// static constexpr std::array const payloadSizes = { +// 64, 128, 2048, 16384, 32768, 524288, 1048576, 2097152}; + +static constexpr std::array const payloadSizes = { +2097152}; struct MyTest : PerfTestHarness { MyTest() { @@ -109,7 +112,6 @@ VT_PERF_TEST(MyTest, test_reduce) { data.resize(payload_size, theContext()->getNode() + 1); theCollective()->barrier(); - StartTimer(grp_proxy[my_node_].get()->timer_names_.at(payload_size)); grp_proxy.allreduce<&NodeObj::handlerVec, collective::PlusOp>(data); } @@ -124,16 +126,12 @@ VT_PERF_TEST(MyTest, test_allreduce_rabenseifner) { using Reducer = collective::reduce::allreduce::Rabenseifner< DataT, collective::PlusOp, NodeObj, &NodeObj::handlerVec>; - auto grp_proxy = vt::theObjGroup()->makeCollective( - "allreduce_rabenseifner", proxy, num_nodes_, data); - grp_proxy[my_node_].get()->proxy_ = grp_proxy; - for (auto payload_size : payloadSizes) { data.resize(payload_size, theContext()->getNode() + 1); theCollective()->barrier(); StartTimer(proxy[my_node_].get()->timer_names_.at(payload_size)); - grp_proxy[my_node_].template invoke<&Reducer::allreduce>(); + proxy.allreduce_h<&NodeObj::handlerVec, collective::PlusOp>(data); } } @@ -146,16 +144,12 @@ VT_PERF_TEST(MyTest, test_allreduce_recursive_doubling) { using Reducer = collective::reduce::allreduce::RecursiveDoubling< DataT, collective::PlusOp, NodeObj, &NodeObj::handlerVec>; - auto grp_proxy = vt::theObjGroup()->makeCollective( - "allreduce_recursive_doubling", proxy, num_nodes_, data); - grp_proxy[my_node_].get()->proxy_ = grp_proxy; - for (auto payload_size : payloadSizes) { data.resize(payload_size, theContext()->getNode() + 1); theCollective()->barrier(); StartTimer(proxy[my_node_].get()->timer_names_.at(payload_size)); - grp_proxy[my_node_].template invoke<&Reducer::allreduce>(); + proxy.allreduce_h<&NodeObj::handlerVec, collective::PlusOp>(data); } }