From 80d67125b7ec497def363d2717a10a04b42a4d7a Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Tue, 21 May 2024 18:02:03 +0200 Subject: [PATCH] #2240: Add unit tests for new allreduce and cleanup code --- .../reduce/allreduce/rabenseifner.h | 37 ++++-- .../reduce/allreduce/recursive_doubling.h | 14 ++- src/vt/objgroup/manager.h | 6 + src/vt/objgroup/manager.impl.h | 105 ++++++++++-------- tests/perf/allreduce.cc | 6 +- tests/unit/objgroup/test_objgroup.cc | 43 +++++++ tests/unit/objgroup/test_objgroup_common.h | 28 +++++ 7 files changed, 178 insertions(+), 61 deletions(-) diff --git a/src/vt/collective/reduce/allreduce/rabenseifner.h b/src/vt/collective/reduce/allreduce/rabenseifner.h index fa5ea0a557..c47f5ff0f0 100644 --- a/src/vt/collective/reduce/allreduce/rabenseifner.h +++ b/src/vt/collective/reduce/allreduce/rabenseifner.h @@ -41,6 +41,7 @@ //@HEADER */ + #if !defined INCLUDED_VT_COLLECTIVE_REDUCE_ALLREDUCE_RABENSEIFNER_H #define INCLUDED_VT_COLLECTIVE_REDUCE_ALLREDUCE_RABENSEIFNER_H @@ -48,6 +49,8 @@ #include "vt/context/context.h" #include "vt/messaging/message/message.h" #include "vt/objgroup/proxy/proxy_objgroup.h" +#include "vt/registry/auto/auto_registry.h" +#include "vt/pipe/pipe_manager.h" #include #include @@ -95,7 +98,6 @@ struct Rabenseifner { vt::objgroup::proxy::Proxy parentProxy, NodeType num_nodes, Args&&... args) : parent_proxy_(parentProxy), - val_(std::forward(args)...), num_nodes_(num_nodes), this_node_(vt::theContext()->getNode()), is_even_(this_node_ % 2 == 0), @@ -104,7 +106,15 @@ struct Rabenseifner { nprocs_rem_(num_nodes_ - nprocs_pof2_), gather_step_(num_steps_ - 1), gather_mask_(nprocs_pof2_ >> 1), - finished_adjustment_part_(nprocs_rem_ == 0) { + finished_adjustment_part_(nprocs_rem_ == 0) + { + initialize(std::forward(args)...); + } + + template + void initialize(Args&&... args) { + val_ = DataT(std::forward(args)...); + is_part_of_adjustment_group_ = this_node_ < (2 * nprocs_rem_); if (is_part_of_adjustment_group_) { if (is_even_) { @@ -156,6 +166,13 @@ struct Rabenseifner { scatter_steps_recv_.resize(num_steps_, false); } + void executeFinalHan() { + + // theCB()->makeSend(parent_proxy_[this_node_]).sendTuple(std::make_tuple(val_)); + parent_proxy_[this_node_].template invoke(val_); + completed_ = true; + } + void allreduce() { if (nprocs_rem_) { adjustForPowerOfTwo(); @@ -181,7 +198,7 @@ struct Rabenseifner { } void adjustForPowerOfTwoRightHalf(AllreduceRbnMsg* msg) { - for (int i = 0; i < msg->val_.size(); i++) { + for (uint32_t i = 0; i < msg->val_.size(); i++) { val_[(val_.size() / 2) + i] += msg->val_[i]; } @@ -192,13 +209,13 @@ struct Rabenseifner { } void adjustForPowerOfTwoLeftHalf(AllreduceRbnMsg* msg) { - for (int i = 0; i < msg->val_.size(); i++) { + for (uint32_t i = 0; i < msg->val_.size(); i++) { val_[i] += msg->val_[i]; } } void adjustForPowerOfTwoFinalPart(AllreduceRbnMsg* msg) { - for (int i = 0; i < msg->val_.size(); i++) { + for (uint32_t i = 0; i < msg->val_.size(); i++) { val_[(val_.size() / 2) + i] = msg->val_[i]; } @@ -243,7 +260,7 @@ struct Rabenseifner { [](const auto val) { return val; })) { auto& in_msg = scatter_messages_.at(step); auto& in_val = in_msg->val_; - for (int i = 0; i < in_val.size(); i++) { + for (uint32_t i = 0; i < in_val.size(); i++) { Op()( val_[r_index_[in_msg->step_] + i], in_val[i]); } @@ -339,7 +356,7 @@ struct Rabenseifner { if (doRed) { auto& in_msg = gather_messages_.at(step); auto& in_val = in_msg->val_; - for (int i = 0; i < in_val.size(); i++) { + for (uint32_t i = 0; i < in_val.size(); i++) { val_[s_index_[in_msg->step_] + i] = in_val[i]; } @@ -417,8 +434,7 @@ struct Rabenseifner { sendToExcludedNodes(); } - parent_proxy_[this_node_].template invoke(val_); - completed_ = true; + executeFinalHan(); } void sendToExcludedNodes() { @@ -435,8 +451,7 @@ struct Rabenseifner { void sendToExcludedNodesHandler(AllreduceRbnMsg* msg) { val_ = msg->val_; - parent_proxy_[this_node_].template invoke(val_); - completed_ = true; + executeFinalHan(); } vt::objgroup::proxy::Proxy proxy_ = {}; diff --git a/src/vt/collective/reduce/allreduce/recursive_doubling.h b/src/vt/collective/reduce/allreduce/recursive_doubling.h index 917196a4d3..f283d8499f 100644 --- a/src/vt/collective/reduce/allreduce/recursive_doubling.h +++ b/src/vt/collective/reduce/allreduce/recursive_doubling.h @@ -97,7 +97,6 @@ struct DistanceDoubling { vt::objgroup::proxy::Proxy parentProxy, NodeType num_nodes, Args&&... args) : parent_proxy_(parentProxy), - val_(std::forward(args)...), num_nodes_(num_nodes), this_node_(vt::theContext()->getNode()), is_even_(this_node_ % 2 == 0), @@ -105,6 +104,12 @@ struct DistanceDoubling { nprocs_pof2_(1 << num_steps_), nprocs_rem_(num_nodes_ - nprocs_pof2_), finished_adjustment_part_(nprocs_rem_ == 0) { + initialize(std::forward(args)...); + } + + template + void initialize(Args&&... args) { + val_ = DataT(std::forward(args)...); is_part_of_adjustment_group_ = this_node_ < (2 * nprocs_rem_); if (is_part_of_adjustment_group_) { if (is_even_) { @@ -168,8 +173,8 @@ struct DistanceDoubling { [](const auto val) { return val; }); } bool isReady() { - return (is_part_of_adjustment_group_ and finished_adjustment_part_) and - step_ == 0 or + return ((is_part_of_adjustment_group_ and finished_adjustment_part_) and + step_ == 0) or allMessagesReceived(); } @@ -279,8 +284,9 @@ struct DistanceDoubling { vt::objgroup::proxy::Proxy parent_proxy_ = {}; DataT val_ = {}; - NodeType this_node_ = {}; NodeType num_nodes_ = {}; + NodeType this_node_ = {}; + bool is_even_ = false; int32_t num_steps_ = {}; int32_t nprocs_pof2_ = {}; diff --git a/src/vt/objgroup/manager.h b/src/vt/objgroup/manager.h index 3ea00d7191..e4921dfef9 100644 --- a/src/vt/objgroup/manager.h +++ b/src/vt/objgroup/manager.h @@ -41,6 +41,7 @@ //@HEADER */ +#include "vt/configs/types/types_type.h" #if !defined INCLUDED_VT_OBJGROUP_MANAGER_H #define INCLUDED_VT_OBJGROUP_MANAGER_H @@ -291,6 +292,9 @@ struct ObjGroupManager : runtime::component::Component { ProxyType proxy, std::string const& name, std::string const& parent = "" ); +template class Op, typename DataT> +ObjGroupManager::PendingSendType allreduce(ProxyType proxy, const DataT& data); + template class Op, typename DataT> ObjGroupManager::PendingSendType allreduce(ProxyType proxy, const DataT& data); @@ -504,6 +508,8 @@ ObjGroupManager::PendingSendType allreduce(ProxyType proxy, const DataT& d std::unordered_map> pending_; /// Map of object groups' labels std::unordered_map labels_; + + std::unordered_map reducers_; }; }} /* end namespace vt::objgroup */ diff --git a/src/vt/objgroup/manager.impl.h b/src/vt/objgroup/manager.impl.h index 4fb3d3dac9..72e464335e 100644 --- a/src/vt/objgroup/manager.impl.h +++ b/src/vt/objgroup/manager.impl.h @@ -41,6 +41,7 @@ //@HEADER */ +#include "vt/configs/types/types_sentinels.h" #if !defined INCLUDED_VT_OBJGROUP_MANAGER_IMPL_H #define INCLUDED_VT_OBJGROUP_MANAGER_IMPL_H @@ -58,7 +59,10 @@ #include "vt/messaging/active.h" #include "vt/elm/elm_id_bits.h" #include "vt/messaging/message/smart_ptr.h" +#include "vt/collective/reduce/allreduce/rabenseifner.h" +#include "vt/collective/reduce/allreduce/recursive_doubling.h" #include +#include #include @@ -264,57 +268,70 @@ ObjGroupManager::PendingSendType ObjGroupManager::broadcast(MsgSharedPtr m return objgroup::broadcast(msg,han); } + +// Helper trait to detect if a type is a specialization of a given variadic template +template