From ddf04f6edf27db91e833509428fd4c9a65e25c89 Mon Sep 17 00:00:00 2001 From: YuxingQiu Date: Tue, 16 Aug 2022 00:16:15 -0700 Subject: [PATCH] Add doc --- cajita/src/Cajita_SparseHalo.hpp | 196 ++++++++++++++++++++++++++++--- 1 file changed, 181 insertions(+), 15 deletions(-) diff --git a/cajita/src/Cajita_SparseHalo.hpp b/cajita/src/Cajita_SparseHalo.hpp index adf266fad..e1bac516f 100644 --- a/cajita/src/Cajita_SparseHalo.hpp +++ b/cajita/src/Cajita_SparseHalo.hpp @@ -693,6 +693,9 @@ class SparseHalo \param exec_space execution space \param sparse_array sparse AoSoA array used to store grid data \param map sparse map used to register activated sparse grid + \param is_neighbor_counting_collected label if the neighbor has already + been collected; if true, it means all neighbor counting information is + up-to-date and there's no need for recollection */ template void gather( const ExecSpace& exec_space, SparseArrayType& sparse_array, @@ -763,7 +766,7 @@ class SparseHalo throw std::logic_error( "sparse_halo_gather: steering receiving failed." ); */ - + // for debug use // print_status(); @@ -771,10 +774,13 @@ class SparseHalo // ------------------------------------------------------------------ // communicate sparse array data + // Pick a tag to use for communication. This object has its own + // communication space so any tag will do. std::vector requests( valid_recvs.size() + valid_sends.size(), MPI_REQUEST_NULL ); const int mpi_tag = 2345; + // post receives for ( int i = 0; i < valid_recvs.size(); ++i ) { int nid = valid_recvs[i]; @@ -788,6 +794,7 @@ class SparseHalo mpi_tag + _receive_tags[nid], _comm, &requests[i] ); } + // pack send buffers and post sends for ( int i = 0; i < valid_sends.size(); ++i ) { int nid = valid_sends[i]; @@ -806,18 +813,23 @@ class SparseHalo _comm, &requests[i + valid_recvs.size()] ); } + // unpack receive buffers for ( int i = 0; i < valid_recvs.size(); ++i ) { + // get the next buffer to unpack int unpack_index = MPI_UNDEFINED; MPI_Waitany( valid_recvs.size(), requests.data(), &unpack_index, MPI_STATUS_IGNORE ); + // in theory we should receive enough buffers to unpack + // if not there could be some problems if ( MPI_UNDEFINED == unpack_index ) std::runtime_error( std::string( "sparse_halo_gather: data receiving failed, " "get only " ) + std::to_string( i ) + ", need " + std::to_string( valid_recvs.size() ) ); + // otherwise unpack the next buffer else { int nid = valid_recvs[unpack_index]; @@ -831,6 +843,7 @@ class SparseHalo } } + // wait to finish all send requests const int ec_data = MPI_Waitall( valid_sends.size(), requests.data() + valid_recvs.size(), MPI_STATUSES_IGNORE ); @@ -838,21 +851,34 @@ class SparseHalo throw std::logic_error( "sparse_halo_gather: data sending failed." ); + // reinit steerings for next round of communication for ( int i = 0; i < _tmp_tile_steering.size(); ++i ) Kokkos::deep_copy( _tmp_tile_steering[i], invalid_key ); MPI_Barrier( _comm ); } /*! - \brief Scatter data from our ghosts to their owners using the given type + \brief Scatter data from our ghosts to their owners using the given type of reduce operation. - */ + \tparam ExecSpace exectution space + \tparam ReduceOp The type of reduction functor + \tparam SparseArrayType sparse array type + \tparam SparseMapType sparse map type + \param exec_space execution space + \param reduce_op The functor used to reduce the results + \param sparse_array sparse AoSoA array used to store grid data + \param map sparse map used to register activated sparse grid + \param is_neighbor_counting_collected label if the neighbor has already + been collected; if true, it means all neighbor counting information is + up-to-date and there's no need for recollection + */ template void scatter( const ExecSpace& exec_space, const ReduceOp& reduce_op, SparseArrayType& sparse_array, const SparseMapType& map, const bool is_neighbor_counting_collected = false ) const { + // return if no valid neighbor if ( 0 == _neighbor_ranks.size() ) return; @@ -863,12 +889,16 @@ class SparseHalo scatterValidSendAndRecvRanks( valid_sends, valid_recvs, is_neighbor_counting_collected ); MPI_Barrier( _comm ); + // ------------------------------------------------------------------ - // communicate steering (array keys) + // communicate steering (array keys) for all valid sends and recieves std::vector steering_requests( valid_recvs.size() + valid_sends.size(), MPI_REQUEST_NULL ); const int mpi_tag_steering = 214; + // get the steering keys from valid neighbors to know all grids that + // we need to receive + // loop over all neighbors that will send data to the current rank for ( int i = 0; i < valid_recvs.size(); ++i ) { int nid = valid_recvs[i]; @@ -882,6 +912,8 @@ class SparseHalo &steering_requests[i] ); } + // send the steering keys to valid neighbors + // loop over all neighbors that requires our owned data for ( int i = 0; i < valid_sends.size(); ++i ) { int nid = valid_sends[i]; @@ -895,6 +927,7 @@ class SparseHalo &steering_requests[i + valid_recvs.size()] ); } + // wait for all sending work finish const int ec_ss = MPI_Waitall( valid_sends.size(), steering_requests.data() + valid_recvs.size(), MPI_STATUSES_IGNORE ); @@ -902,20 +935,29 @@ class SparseHalo throw std::logic_error( "sparse_halo_scatter: steering sending failed." ); - const int ec_sr = MPI_Waitall( - valid_recvs.size(), steering_requests.data(), MPI_STATUSES_IGNORE ); - if ( MPI_SUCCESS != ec_sr ) - throw std::logic_error( - "sparse_halo_scatter: steering receiving failed." ); + /* maybe no need to wait here + // wait for all receiving work finish + const int ec_sr = MPI_Waitall( + valid_recvs.size(), steering_requests.data(), + MPI_STATUSES_IGNORE ); if ( MPI_SUCCESS != ec_sr ) throw + std::logic_error( "sparse_halo_scatter: steering receiving failed." + ); + */ + // for debug use // print_status(); + MPI_Barrier( _comm ); + // ------------------------------------------------------------------ // communicate sparse array data + // Pick a tag to use for communication. This object has its own + // communication space so any tag will do. std::vector requests( valid_recvs.size() + valid_sends.size(), MPI_REQUEST_NULL ); const int mpi_tag = 345; + // post receives for ( int i = 0; i < valid_recvs.size(); ++i ) { int nid = valid_recvs[i]; @@ -929,6 +971,7 @@ class SparseHalo mpi_tag + _receive_tags[nid], _comm, &requests[i] ); } + // pack send buffers and post sends for ( int i = 0; i < valid_sends.size(); ++i ) { int nid = valid_sends[i]; @@ -948,18 +991,23 @@ class SparseHalo &requests[i + valid_recvs.size()] ); } + // unpack receive buffers for ( int i = 0; i < valid_recvs.size(); ++i ) { + // get the next buffer to unpack int unpack_index = MPI_UNDEFINED; MPI_Waitany( valid_recvs.size(), requests.data(), &unpack_index, MPI_STATUS_IGNORE ); + // in theory we should receive enough buffers to unpack + // if not there could be some problems if ( MPI_UNDEFINED == unpack_index ) std::runtime_error( std::string( "sparse_halo_scatter: data receiving failed, " "get only " ) + std::to_string( i ) + ", need " + std::to_string( valid_recvs.size() ) ); + // otherwise unpack the next buffer with the given reduce operator else { int nid = valid_recvs[unpack_index]; @@ -972,6 +1020,7 @@ class SparseHalo } } + // wait to finish all send requests const int ec_data = MPI_Waitall( valid_sends.size(), requests.data() + valid_recvs.size(), MPI_STATUSES_IGNORE ); @@ -979,12 +1028,24 @@ class SparseHalo throw std::logic_error( "sparse_halo_scatter: data sending failed." ); + // reinit steerings for next round of communication for ( int i = 0; i < _tmp_tile_steering.size(); ++i ) Kokkos::deep_copy( _tmp_tile_steering[i], invalid_key ); MPI_Barrier( _comm ); } //---------------------------------------------------------------------------// + /*! + \brief Pack sparse arrays at halo regions into a buffer + \tparam ExecSpace execution space type + \tparam SparseArrayType sparse array type + \tparam CountType counting number type + \param exec_space execution space + \param buffer buffer to store sparse array data and to communicate + \param tile_steering Kokkos view to store halo tile keys + \param sparse_array sparse array (all sparse grids on current rank) + \param count number of halo grids to pack + */ template void packBuffer( const ExecSpace& exec_space, const buffer_view& buffer, const steering_view& tile_steering, @@ -1046,6 +1107,19 @@ class SparseHalo } //---------------------------------------------------------------------------// + /*! + \brief Unpack a sparse arrays element (a tuple) in a buffer + (for case tuple members with rank == 3) + \tparam ReduceOp reduce functor type + \tparam N element ID inside a SoA tuple (N-th data member) + \tparam M rank number of the current element (N-th data member) + \tparam SoAType SoA type in sparse array (which is an AoSoA) + \param reduce_op reduce operation + \param src_tuple source tuple + \param dst_soa destination SoA to store copied data + \param soa_idx tuple index inside the destination SoA + \param extents element member extents in all ranks + */ template KOKKOS_FORCEINLINE_FUNCTION static std::enable_if_t<3 == M, void> unpackTupleMember( const ReduceOp& reduce_op, const tuple_type& src_tuple, @@ -1064,6 +1138,19 @@ class SparseHalo } } + /*! + \brief Unpack a sparse arrays element (a tuple) in a buffer + (for case tuple members with rank == 2) + \tparam ReduceOp reduce functor type + \tparam N element ID inside a SoA tuple (N-th data member) + \tparam M rank number of the current element (N-th data member) + \tparam SoAType SoA type in sparse array (which is an AoSoA) + \param reduce_op reduce operation + \param src_tuple source tuple + \param dst_soa destination SoA to store copied data + \param soa_idx tuple index inside the destination SoA + \param extents element member extents in all ranks + */ template KOKKOS_FORCEINLINE_FUNCTION static std::enable_if_t<2 == M, void> unpackTupleMember( const ReduceOp& reduce_op, const tuple_type& src_tuple, @@ -1080,6 +1167,19 @@ class SparseHalo } } + /*! + \brief Unpack a sparse arrays element (a tuple) in a buffer + (for case tuple members with rank == 1) + \tparam ReduceOp reduce functor type + \tparam N element ID inside a SoA tuple (N-th data member) + \tparam M rank number of the current element (N-th data member) + \tparam SoAType SoA type in sparse array (which is an AoSoA) + \param reduce_op reduce operation + \param src_tuple source tuple + \param dst_soa destination SoA to store copied data + \param soa_idx tuple index inside the destination SoA + \param extents element member extents in all ranks + */ template KOKKOS_FORCEINLINE_FUNCTION static std::enable_if_t<1 == M, void> unpackTupleMember( const ReduceOp& reduce_op, const tuple_type& src_tuple, @@ -1095,6 +1195,19 @@ class SparseHalo } } + /*! + \brief Unpack a sparse arrays element (a tuple) in a buffer + (for case tuple members with rank == 0) + \tparam ReduceOp reduce functor type + \tparam N element ID inside a SoA tuple (N-th data member) + \tparam M rank number of the current element (N-th data member) + \tparam SoAType SoA type in sparse array (which is an AoSoA) + \param reduce_op reduce operation + \param src_tuple source tuple + \param dst_soa destination SoA to store copied data + \param soa_idx tuple index inside the destination SoA + \param extents element member extents in all ranks + */ template KOKKOS_FORCEINLINE_FUNCTION static std::enable_if_t<0 == M, void> unpackTupleMember( const ReduceOp& reduce_op, const tuple_type& src_tuple, @@ -1107,6 +1220,15 @@ class SparseHalo Cabana::get( dst_soa, soa_idx ) ); } + /*! + \brief Unpack a sparse arrays tuple for it's member with index 0 + \tparam ReduceOp reduce functor type + \tparam SoAType SoA type in sparse array (which is an AoSoA) + \param reduce_op reduce operation + \param src_tuple source tuple + \param dst_soa destination SoA to store copied data + \param soa_idx tuple index inside the destination SoA + */ template KOKKOS_FORCEINLINE_FUNCTION static void unpackTuple( const ReduceOp& reduce_op, const tuple_type& src_tuple, @@ -1122,6 +1244,16 @@ class SparseHalo std::rank::value>() ); } + /*! + \brief Unpack a sparse arrays tuple for all members when element ID!=0 + \tparam ReduceOp reduce functor type + \tparam SoAType SoA type in sparse array (which is an AoSoA) + \tparam N Unpack N-th data member in this call + \param reduce_op reduce operation + \param src_tuple source tuple + \param dst_soa destination SoA to store copied data + \param soa_idx tuple index inside the destination SoA + */ template KOKKOS_FORCEINLINE_FUNCTION static void unpackTuple( const ReduceOp& reduce_op, const tuple_type& src_tuple, @@ -1138,7 +1270,7 @@ class SparseHalo if ( N > 1 ) { - // recurcive + // recurcively unpack the next tuple element unpackTuple( reduce_op, src_tuple, dst_soa, soa_idx, std::integral_constant() ); } @@ -1149,29 +1281,42 @@ class SparseHalo } } + /*! + \brief Unpack a sparse array communication buffer + \tparam ReduceOp reduce functor type + \tparam ExecSpace execution space type + \tparam SparseArrayType sparse array type + \tparam SparseMapType sparse map type + \tparam CountType counting number type + \param reduce_op reduce operation + \param exec_space execution space + \param buffer buffer to store sparse array data and to communicate + \param tile_steering Kokkos view to store halo tile keys + \param sparse_array sparse array (all sparse grids on current rank) + \param map sparse map that has valid grids registered + \param count number of halo grids to unpack + */ template void unpackBuffer( const ReduceOp& reduce_op, const ExecSpace& exec_space, const buffer_view& buffer, const steering_view& tile_steering, const SparseArrayType& sparse_array, SparseMapType& map, - const CountType counts ) const + const CountType count ) const { int sr = _self_rank; Kokkos::parallel_for( "unpack_spares_halo_buffer", - Kokkos::RangePolicy( exec_space, 0, counts ), + Kokkos::RangePolicy( exec_space, 0, count ), KOKKOS_LAMBDA( const int i ) { if ( tile_steering( i ) != invalid_key ) { auto tile_key = tile_steering( i ); if ( map.is_valid_key( tile_key ) ) { - // for debug int ti, tj, tk; map.key2ijk( tile_key, ti, tj, tk ); - // for debug -- end auto tile_id = map.queryTileFromTileKey( tile_key ); const int buffer_idx = i * cell_num_per_tile; @@ -1192,6 +1337,7 @@ class SparseHalo } //---------------------------------------------------------------------------// + //! print information for debugging purpose void print_spaces() { for ( int i = 0; i < _neighbor_ranks.size(); i++ ) @@ -1216,6 +1362,7 @@ class SparseHalo (int)sizeof( tuple_type ) ); } + //! print current status for debugging purpose void print_status( int frame, int step ) const { for ( int i = 0; i < _neighbor_ranks.size(); ++i ) @@ -1258,7 +1405,7 @@ class SparseHalo private: // [NOTES] this part may be useful if added to Cabana_MemberType.hpp - // member size + // compute member size template static constexpr std::size_t compute_member_size() { @@ -1318,29 +1465,48 @@ class SparseHalo // [NOTES - end] private: + // MPI communicator. MPI_Comm _comm; + + // Current MPI linear rank ID int _self_rank; + // Hallo pattern halo_pattern_type _pattern; + // neighbor rank linear MPI rank IDs std::vector _neighbor_ranks; + // valid neigber rank indices; valid means require data communication std::vector> _valid_neighbor_ids; + // sending tags std::vector _send_tags; + // receiving tages std::vector _receive_tags; + // owned view buffers std::vector _owned_buffers; + // ghosted view buffers std::vector _ghosted_buffers; + // owned tile key steerings std::vector _owned_tile_steering; + // key steering buffers (used to store valid keys get from neighbors) std::vector _tmp_tile_steering; + // ghosted tile key steerings std::vector _ghosted_tile_steering; + // valid halo grid counting on current rank (each element map to a neighbor) std::vector _valid_counting; + // valid halo grid counting on corresponding neighbor ranks std::vector _neighbor_counting; + // owned tile space std::vector _owned_tile_spaces; + // ghosted tile space std::vector _ghosted_tile_spaces; + // SoA member bytes num Kokkos::Array _soa_member_bytes; + // SoA total bytes count std::size_t _soa_total_bytes; }; }; // namespace Experimental