From 82a2050657aab4851af684c84355e0e399b0d61a Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Tue, 26 Sep 2023 08:58:33 -0600 Subject: [PATCH] Stream support for Gauss-Seidel: Symbolic, Numeric, Apply (Twostage) --- common/src/KokkosKernels_Utils.hpp | 5 +- .../impl/KokkosSparse_gauss_seidel_impl.hpp | 10 +- ...okkosSparse_twostage_gauss_seidel_impl.hpp | 96 ++++++++++--------- sparse/src/KokkosKernels_Handle.hpp | 22 +++-- sparse/src/KokkosSparse_gauss_seidel.hpp | 96 +++++++++---------- .../src/KokkosSparse_gauss_seidel_handle.hpp | 15 ++- 6 files changed, 134 insertions(+), 110 deletions(-) diff --git a/common/src/KokkosKernels_Utils.hpp b/common/src/KokkosKernels_Utils.hpp index c6780185a4..01249097e6 100644 --- a/common/src/KokkosKernels_Utils.hpp +++ b/common/src/KokkosKernels_Utils.hpp @@ -892,7 +892,7 @@ void permute_block_vector(typename idx_array_type::value_type num_elements, // TODO BMK: clean this up by removing 1st argument. It is unused but // its name gives the impression that only num_elements of the vector are // zeroed, when really it's always the whole thing. -template +template void zero_vector(ExecSpaceIn &exec_space_in, typename value_array_type::value_type /* num_elements */, value_array_type &vector) { @@ -908,8 +908,7 @@ void zero_vector(typename value_array_type::value_type /* num_elements */, using ne_tmp_t = typename value_array_type::value_type; ne_tmp_t ne_tmp = ne_tmp_t(0); MyExecSpace my_exec_space; - zero_vector(my_exec_space, ne_tmp, - vector); + zero_vector(my_exec_space, ne_tmp, vector); } template diff --git a/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp b/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp index 7391e00e3d..12f7dea38a 100644 --- a/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp +++ b/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp @@ -1547,9 +1547,8 @@ class PointGaussSeidel { Permuted_Yvector); } if (init_zero_x_vector) { - KokkosKernels::Impl::zero_vector< - MyExecSpace, scalar_persistent_work_view2d_t, MyExecSpace>( - my_exec_space, num_cols * block_size, Permuted_Xvector); + KokkosKernels::Impl::zero_vector(my_exec_space, num_cols * block_size, + Permuted_Xvector); } else { KokkosKernels::Impl::permute_block_vector< x_value_array_type, scalar_persistent_work_view2d_t, @@ -1664,9 +1663,8 @@ class PointGaussSeidel { Permuted_Yvector); } if (init_zero_x_vector) { - KokkosKernels::Impl::zero_vector< - MyExecSpace, scalar_persistent_work_view2d_t, MyExecSpace>( - my_exec_space, num_cols, Permuted_Xvector); + KokkosKernels::Impl::zero_vector(my_exec_space, num_cols, + Permuted_Xvector); } else { KokkosKernels::Impl::permute_vector< x_value_array_type, scalar_persistent_work_view2d_t, diff --git a/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp b/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp index 00fdcd2442..83b9d0b949 100644 --- a/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp +++ b/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp @@ -861,10 +861,11 @@ class TwostageGaussSeidel { #endif // - auto *gsHandle = get_gs_handle(); - bool two_stage = gsHandle->isTwoStage(); - bool compact_form = gsHandle->isCompactForm(); - scalar_t gamma = gsHandle->getInnerDampFactor(); + auto *gsHandle = get_gs_handle(); + auto my_exec_space = gsHandle->get_execution_space(); + bool two_stage = gsHandle->isTwoStage(); + bool compact_form = gsHandle->isCompactForm(); + scalar_t gamma = gsHandle->getInnerDampFactor(); GSDirection direction = gsHandle->getSweepDirection(); if (apply_forward && apply_backward) { @@ -887,7 +888,7 @@ class TwostageGaussSeidel { auto crsmatUa = gsHandle->getUa(); // complement of U+D (used only for compact form) - // wratp A into crsmat + // wrap A into crsmat input_crsmat_t crsmatA("A", num_rows, num_cols, values_view.extent(0), values_view, rowmap_view, column_view); #ifdef KOKKOSSPARSE_IMPL_TIME_TWOSTAGE_GS @@ -920,36 +921,36 @@ class TwostageGaussSeidel { NumSweeps *= 2; } if (init_zero_x_vector) { - KokkosKernels::Impl::zero_vector( - nrhs, localX); + KokkosKernels::Impl::zero_vector(my_exec_space, nrhs, localX); } for (int sweep = 0; sweep < NumSweeps; ++sweep) { bool forward_sweep = (direction == GS_FORWARD || (direction == GS_SYMMETRIC && sweep % 2 == 0)); // compute residual vector - KokkosBlas::scal(localR, one, localB); + KokkosBlas::scal(my_exec_space, localR, one, localB); if (sweep > 0 || !init_zero_x_vector) { if (compact_form) { if (forward_sweep) { // R = B - U*x - KokkosSparse::spmv("N", scalar_t(-one), crsmatUa, localX, one, - localR); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-one), crsmatUa, + localX, one, localR); } else { // R = B - L*x - KokkosSparse::spmv("N", scalar_t(-one), crsmatLa, localX, one, - localR); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-one), crsmatLa, + localX, one, localR); } if (omega != one) { // R = B - (U + (1-1/omega)D)*x scalar_t omega2 = (one / omega - one); auto localY = Kokkos::subview(localX, range_type(0, num_rows), Kokkos::ALL()); - KokkosBlas::mult(zero, localZ, one, localDa, localY); - KokkosBlas::axpy(omega2, localZ, localR); + KokkosBlas::mult(my_exec_space, zero, localZ, one, localDa, localY); + KokkosBlas::axpy(my_exec_space, omega2, localZ, localR); } } else { // not compact_form // R = B - A*x - KokkosSparse::spmv("N", scalar_t(-one), crsmatA, localX, one, localR); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-one), crsmatA, + localX, one, localR); #ifdef KOKKOSSPARSE_IMPL_TIME_TWOSTAGE_GS { auto localRj = @@ -987,6 +988,8 @@ class TwostageGaussSeidel { single_vector_view_t Zj(localZj.data(), num_rows); sptrsv_solve(handle->get_gs_sptrsvL_handle(), crsmatL.graph.row_map, crsmatL.graph.entries, crsmatL.values, Rj, Zj); + execution_space() + .fence(); // TODO: call sptrsv_solve on stream and remove } } else { using namespace KokkosSparse::Experimental; @@ -1001,6 +1004,8 @@ class TwostageGaussSeidel { single_vector_view_t Zj(localZj.data(), num_rows); sptrsv_solve(handle->get_gs_sptrsvU_handle(), crsmatU.graph.row_map, crsmatU.graph.entries, crsmatU.values, Rj, Zj); + execution_space() + .fence(); // TODO: call sptrsv_solve on stream and remove } } @@ -1009,21 +1014,25 @@ class TwostageGaussSeidel { Kokkos::subview(localX, range_type(0, num_rows), Kokkos::ALL()); if (compact_form) { // Y = omega * Z - KokkosBlas::scal(localY, one, localZ); + KokkosBlas::scal(my_exec_space, localY, one, localZ); } else { // Y = Y + omega * Z - KokkosBlas::axpy(one, localZ, localY); + KokkosBlas::axpy(my_exec_space, one, localZ, localY); } } else { // ====== inner Jacobi-Richardson ===== #ifdef KOKKOSSPARSE_IMPL_TIME_TWOSTAGE_GS // compute initial residual norm // > compute RHS for the inner loop, R = B - A*x - internal_vector_view_t tempR("tempR", num_rows, 1); - KokkosBlas::scal(tempR, one, localB); - KokkosSparse::spmv("N", scalar_t(-one), crsmatA, localX, one, tempR); + internal_vector_view_t tempR( + Kokkos::view_alloc(my_exec_space, std::string("tempR")), num_rows, + 1); + KokkosBlas::scal(my_exec_space, tempR, one, localB); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-one), crsmatA, localX, + one, tempR); // > initial vector for the inner loop is zero - Kokkos::deep_copy(localZ, zero); + Kokkos::deep_copy(my_exec_space, localZ, zero); + my_exec_space.fence(); using Norm_Functor_t = TwostageGaussSeidel_functor; @@ -1031,7 +1040,7 @@ class TwostageGaussSeidel { { mag_t normR = zero; Kokkos::parallel_reduce( - "normR", range_policy(0, num_rows), + "normR", range_policy(my_exec_space, 0, num_rows), Norm_Functor_t(forward_sweep, num_rows, rowmap_view, column_view, values_view, localD, localZ, tempR), normR); @@ -1046,23 +1055,23 @@ class TwostageGaussSeidel { // row-scale: (D^{-1}*L)*Y = D^{-1}*B // compute Z := D^{-1}*R - KokkosBlas::mult(zero, localZ, one, localD, localR); + KokkosBlas::mult(my_exec_space, zero, localZ, one, localD, localR); // apply inner damping factor, if not one if (gamma != one) { // Z = gamma * Z - KokkosBlas::scal(localZ, gamma, localZ); + KokkosBlas::scal(my_exec_space, localZ, gamma, localZ); } } else { // copy to localT (workspace used to save D^{-1}*R for JR iteration) - KokkosBlas::mult(zero, localT, one, localD, localR); + KokkosBlas::mult(my_exec_space, zero, localT, one, localD, localR); // initialize Jacobi-Richardson (using R as workspace for JR // iteration) - KokkosBlas::scal(localR, one, localT); + KokkosBlas::scal(my_exec_space, localR, one, localT); // apply inner damping factor, if not one if (gamma != one) { // R = gamma * R - KokkosBlas::scal(localR, gamma, localR); + KokkosBlas::scal(my_exec_space, localR, gamma, localR); } } #ifdef KOKKOSSPARSE_IMPL_TIME_TWOSTAGE_GS @@ -1070,7 +1079,7 @@ class TwostageGaussSeidel { // compute residual norm of the starting vector (D^{-1}R) mag_t normR = zero; Kokkos::parallel_reduce( - "normR", range_policy(0, num_rows), + "normR", range_policy(my_exec_space, 0, num_rows), Norm_Functor_t(forward_sweep, num_rows, rowmap_view, column_view, values_view, localD, localT, tempR), normR); @@ -1081,34 +1090,34 @@ class TwostageGaussSeidel { for (int ii = 0; ii < NumInnerSweeps; ii++) { // T = D^{-1}*R, and L = D^{-1}*L and U = D^{-1}*U // copy T into Z - KokkosBlas::scal(localZ, one, localT); + KokkosBlas::scal(my_exec_space, localZ, one, localT); if (forward_sweep) { // Z = Z - L*R - KokkosSparse::spmv("N", scalar_t(-omega), crsmatL, localR, one, - localZ); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-omega), crsmatL, + localR, one, localZ); } else { // Z = R - U*T - KokkosSparse::spmv("N", scalar_t(-omega), crsmatU, localR, one, - localZ); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-omega), crsmatU, + localR, one, localZ); } // apply inner damping factor, if not one if (gamma != one) { // Z = gamma * Z - KokkosBlas::scal(localZ, gamma, localZ); + KokkosBlas::scal(my_exec_space, localZ, gamma, localZ); // Z = Z + (one - one/gamma) * R scalar_t gamma2 = one - gamma; - KokkosBlas::axpy(gamma2, localR, localZ); + KokkosBlas::axpy(my_exec_space, gamma2, localR, localZ); } if (ii + 1 < NumInnerSweeps) { // reinitialize (R to be Z) - KokkosBlas::scal(localR, one, localZ); + KokkosBlas::scal(my_exec_space, localR, one, localZ); } #ifdef KOKKOSSPARSE_IMPL_TIME_TWOSTAGE_GS { // compute residual norm(r - (L+D)*y) mag_t normR = zero; Kokkos::parallel_reduce( - "normR", range_policy(0, num_rows), + "normR", range_policy(my_exec_space, 0, num_rows), Norm_Functor_t(forward_sweep, num_rows, rowmap_view, column_view, values_view, localD, localZ, tempR), normR); @@ -1123,22 +1132,23 @@ class TwostageGaussSeidel { Kokkos::subview(localX, range_type(0, num_rows), Kokkos::ALL()); if (compact_form) { // Y := omega * z - KokkosBlas::scal(localY, omega, localZ); + KokkosBlas::scal(my_exec_space, localY, omega, localZ); } else { // Y := X + omega * Z - KokkosBlas::axpy(omega, localZ, localY); + KokkosBlas::axpy(my_exec_space, omega, localZ, localY); } } // end of inner GS sweep } // end of outer GS sweep #ifdef KOKKOSSPARSE_IMPL_TIME_TWOSTAGE_GS { // R = B - A*x - KokkosBlas::scal(localR, one, localB); - KokkosSparse::spmv("N", scalar_t(-one), crsmatA, localX, one, localR); + KokkosBlas::scal(my_exec_space, localR, one, localB); + KokkosSparse::spmv(my_exec_space, "N", scalar_t(-one), crsmatA, localX, + one, localR); auto localRj = Kokkos::subview(localR, Kokkos::ALL(), range_type(0, 1)); single_vector_view_t Rj(localRj.data(), num_rows); - std::cout << "norm(GS)-" << NumSweeps << " " << KokkosBlas::nrm2(Rj) - << std::endl; + std::cout << "norm(GS)-" << NumSweeps << " " + << KokkosBlas::nrm2(my_exec_space, Rj) << std::endl; } #endif } diff --git a/sparse/src/KokkosKernels_Handle.hpp b/sparse/src/KokkosKernels_Handle.hpp index d500f19d48..03cabdb09e 100644 --- a/sparse/src/KokkosKernels_Handle.hpp +++ b/sparse/src/KokkosKernels_Handle.hpp @@ -610,14 +610,19 @@ class KokkosKernelsHandle { * @param num_streams The number of streams to allocate memory for. * @param gs_algorithm Specifies which algorithm to use: * - * KokkosSpace::GS_DEFAULT PointGaussSeidel - * KokkosSpace::GS_PERMUTED ?? - * KokkosSpace::GS_TEAM ?? - * KokkosSpace::GS_CLUSTER ?? - * KokkosSpace::GS_TWOSTAGE ?? + * KokkosSpace::GS_DEFAULT PointGaussSeidel or BlockGaussSeidel, depending on matrix type. + * KokkosSpace::GS_PERMUTED Reorders rows/cols into colors to improve locality. Uses RangePolicy over rows. + * KokkosSpace::GS_TEAM Uses TeamPolicy over batches of rows with ThreadVector within rows. + * KokkosSpace::GS_CLUSTER Uses independent clusters of nodes in the graph. Within a cluster, x is updated sequentially. + * For more information, see: https://arxiv.org/pdf/2204.02934.pdf. + * KokkosSpace::GS_TWOSTAGE Uses spmv to parallelize inner sweeps of x. + * For more information, see: https://arxiv.org/pdf/2104.01196.pdf. * @param coloring_algorithm Specifies which coloring algorithm to color the graph with: * - * KokkosGraph::COLORING_DEFAULT ?? + * KokkosGraph::COLORING_DEFAULT Depends on execution space: + * COLORING_SERIAL on Kokkos::Serial; + * COLORING_EB on GPUs; + * COLORING_VBBIT on Kokkos::Sycl or elsewhere. * KokkosGraph::COLORING_SERIAL Serial Greedy Coloring * KokkosGraph::COLORING_VB Vertex Based Coloring * KokkosGraph::COLORING_VBBIT Vertex Based Coloring with bit array @@ -754,7 +759,10 @@ class KokkosKernelsHandle { * @param hint_verts_per_cluster Hint how many verticies to use per cluster * @param coloring_algorithm Specifies which coloring algorithm to color the graph with: * - * KokkosGraph::COLORING_DEFAULT ?? + * KokkosGraph::COLORING_DEFAULT Depends on execution space: + * COLORING_SERIAL on Kokkos::Serial; + * COLORING_EB on GPUs; + * COLORING_VBBIT on Kokkos::Sycl or elsewhere. * KokkosGraph::COLORING_SERIAL Serial Greedy Coloring * KokkosGraph::COLORING_VB Vertex Based Coloring * KokkosGraph::COLORING_VBBIT Vertex Based Coloring with bit array diff --git a/sparse/src/KokkosSparse_gauss_seidel.hpp b/sparse/src/KokkosSparse_gauss_seidel.hpp index 036fe1b119..4e362f2781 100644 --- a/sparse/src/KokkosSparse_gauss_seidel.hpp +++ b/sparse/src/KokkosSparse_gauss_seidel.hpp @@ -29,13 +29,13 @@ namespace Experimental { /// @brief Gauss-Seidel preconditioner setup (first phase, based on sparsity /// pattern only) /// -/// @tparam ExecutionSpace This kernels execution space type. +/// @tparam ExecutionSpace This kernels execution space type /// @tparam KernelHandle A specialization of /// KokkosKernels::Experimental::KokkosKernelsHandle /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type -/// @param space The execution space instance this kernel will be run on. -/// @param handle KernelHandle instance +/// @param space The execution space instance this kernel will be run on +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -112,7 +112,7 @@ void gauss_seidel_symbolic(const ExecutionSpace &space, KernelHandle *handle, /// KokkosKernels::Experimental::KokkosKernelsHandle /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type -/// @param handle KernelHandle instance +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -141,7 +141,7 @@ void gauss_seidel_symbolic(KernelHandle *handle, /// KokkosKernels::Experimental::KokkosKernelsHandle /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type -/// @param handle KernelHandle instance +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param block_size The number of degrees of freedom per block @@ -173,15 +173,15 @@ void block_gauss_seidel_symbolic( /// @brief Gauss-Seidel preconditioner setup (second phase, based on matrix's /// numeric values) /// -/// @tparam ExecutionSpace This kernels execution space type. +/// @tparam ExecutionSpace This kernels execution space type /// @tparam format The matrix storage format, CRS or BSR /// @tparam KernelHandle A specialization of /// KokkosKernels::Experimental::KokkosKernelsHandle /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type -/// @param space The execution space instance this kernel will be run on. -/// @param handle KernelHandle instance +/// @param space The execution space instance this kernel will be run on +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -279,8 +279,8 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type. The user-provided -/// inverse diagonal must share this type. -/// @param handle KernelHandle instance +/// inverse diagonal must share this type +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -313,16 +313,16 @@ void gauss_seidel_numeric(KernelHandle *handle, /// numeric values). This version accepts the matrix's inverse diagonal from the /// user. /// -/// @tparam ExecutionSpace This kernels execution space type. +/// @tparam ExecutionSpace This kernels execution space type /// @tparam format The matrix storage format, CRS or BSR /// @tparam KernelHandle A specialization of /// KokkosKernels::Experimental::KokkosKernelsHandle /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type. The user-provided -/// inverse diagonal must share this type. -/// @param space The execution space instance this kernel will be run on. -/// @param handle KernelHandle instance +/// inverse diagonal must share this type +/// @param space The execution space instance this kernel will be run on +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -427,8 +427,8 @@ void gauss_seidel_numeric(const ExecutionSpace &space, KernelHandle *handle, /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type. The user-provided -/// inverse diagonal must share this type. -/// @param handle KernelHandle instance +/// inverse diagonal must share this type +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -468,7 +468,7 @@ void gauss_seidel_numeric(KernelHandle *handle, /// @tparam lno_row_view_t_ The matrix's rowmap type /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type -/// @param handle handle A KokkosKernelsHandle instance +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param block_size The number of degrees of freedom per block @@ -504,7 +504,7 @@ void block_gauss_seidel_numeric( /// @brief Apply symmetric (forward + backward) Gauss-Seidel preconditioner to /// system AX=Y /// -/// @tparam ExecutionSpace This kernels execution space type. +/// @tparam ExecutionSpace This kernels execution space type /// @tparam format The matrix storage format, CRS or BSR /// @tparam KernelHandle A specialization of /// KokkosKernels::Experimental::KokkosKernelsHandle @@ -512,12 +512,12 @@ void block_gauss_seidel_numeric( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. +/// rank-1 or rank-2 View /// @param space The execution space instance this kernel will be run -/// on. NOTE: Currently only used for GS_DEFAULT. -/// @param handle handle A KokkosKernelsHandle instance +/// on. NOTE: Currently only used for GS_DEFAULT and GS_TWOSTAGE +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -680,8 +680,8 @@ void symmetric_gauss_seidel_apply( /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. /// May be rank-1 or rank-2 View. /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. -/// @param handle handle A KokkosKernelsHandle instance +/// rank-1 or rank-2 View +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -727,10 +727,10 @@ void symmetric_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. -/// @param handle handle A KokkosKernelsHandle instance. +/// rank-1 or rank-2 View +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param block_size The number of degrees of freedom per block @@ -793,12 +793,12 @@ void symmetric_block_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. +/// rank-1 or rank-2 View /// @param space The execution space instance this kernel will be run -/// on. NOTE: Currently only used for GS_DEFAULT. -/// @param handle KernelHandle instance +/// on. NOTE: Currently only used for GS_DEFAULT and GS_TWOSTAGE +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -960,10 +960,10 @@ void forward_sweep_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. -/// @param handle KernelHandle instance +/// rank-1 or rank-2 View +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -1008,10 +1008,10 @@ void forward_sweep_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. -/// @param handle KernelHandle instance +/// rank-1 or rank-2 View +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param block_size The number of degrees of freedom per block @@ -1067,7 +1067,7 @@ void forward_sweep_block_gauss_seidel_apply( /// /// @brief Apply backward Gauss-Seidel preconditioner to system AX=Y /// -/// @tparam ExecutionSpace This kernels execution space type. +/// @tparam ExecutionSpace This kernels execution space type /// @tparam format The matrix storage format, CRS or BSR /// @tparam KernelHandle A specialization of /// KokkosKernels::Experimental::KokkosKernelsHandle @@ -1075,12 +1075,12 @@ void forward_sweep_block_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. +/// rank-1 or rank-2 View /// @param space The execution space instance this kernel will be run -/// on. NOTE: Currently only used for GS_DEFAULT. -/// @param handle KernelHandle instance +/// on. NOTE: Currently only used for GS_DEFAULT and GS_TWOSTAGE +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -1242,10 +1242,10 @@ void backward_sweep_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. -/// @param handle KernelHandle instance +/// rank-1 or rank-2 View +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param row_map The matrix's rowmap @@ -1290,10 +1290,10 @@ void backward_sweep_gauss_seidel_apply( /// @tparam lno_nnz_view_t_ The matrix's entries type /// @tparam scalar_nnz_view_t_ The matrix's values type /// @tparam x_scalar_view_t The type of the X (left-hand side, unknown) vector. -/// May be rank-1 or rank-2 View. +/// May be rank-1 or rank-2 View /// @tparam y_scalar_view_t The type of the Y (right-hand side) vector. May be -/// rank-1 or rank-2 View. -/// @param handle KernelHandle instance +/// rank-1 or rank-2 View +/// @param handle A KokkosKernelsHandle instance /// @param num_rows Number of rows in the matrix /// @param num_cols Number of columns in the matrix /// @param block_size The number of degrees of freedom per block diff --git a/sparse/src/KokkosSparse_gauss_seidel_handle.hpp b/sparse/src/KokkosSparse_gauss_seidel_handle.hpp index 649229918d..0b47b3e92c 100644 --- a/sparse/src/KokkosSparse_gauss_seidel_handle.hpp +++ b/sparse/src/KokkosSparse_gauss_seidel_handle.hpp @@ -758,9 +758,18 @@ class TwoStageGaussSeidelHandle void initVectors(int nrows_, int nrhs_) { if (this->nrows != nrows_ || this->nrhs != nrhs_) { - this->localR = vector_view_t("temp", nrows_, nrhs_); - this->localT = vector_view_t("temp", nrows_, nrhs_); - this->localZ = vector_view_t("temp", nrows_, nrhs_); + vector_view_t r( + Kokkos::view_alloc(this->execution_space, std::string("temp")), + nrows_, nrhs_); + this->localR = r; + vector_view_t t( + Kokkos::view_alloc(this->execution_space, std::string("temp")), + nrows_, nrhs_); + this->localT = t; + vector_view_t z( + Kokkos::view_alloc(this->execution_space, std::string("temp")), + nrows_, nrhs_); + this->localZ = z; this->nrows = nrows_; this->nrhs = nrhs_; }