Skip to content

Commit

Permalink
Add sptrsv execution space overloads
Browse files Browse the repository at this point in the history
  • Loading branch information
e10harvey committed Sep 27, 2023
1 parent 3f7e535 commit bf14408
Show file tree
Hide file tree
Showing 7 changed files with 599 additions and 289 deletions.
13 changes: 13 additions & 0 deletions docs/developer/apidocs/sparse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,16 @@ par_ilut
gmres
-----
.. doxygenfunction:: gmres(KernelHandle* handle, AMatrix& A, BType& B, XType& X, Preconditioner<AMatrix>* precond)

sptrsv
------
.. doxygenfunction:: sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries)
.. doxygenfunction:: sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries)
.. doxygenfunction:: sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values)
.. doxygenfunction:: sptrsv_symbolic(KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values)
.. doxygenfunction:: sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, BType b, XType x)
.. doxygenfunction:: sptrsv_solve(KernelHandle *handle, lno_row_view_t_ rowmap, lno_nnz_view_t_ entries, scalar_nnz_view_t_ values, BType b, XType x)
.. doxygenfunction:: sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, XType x, XType b)
.. doxygenfunction:: sptrsv_solve(KernelHandle *handle, XType x, XType b)
.. doxygenfunction:: sptrsv_solve(ExecutionSpace &space, KernelHandle *handleL, KernelHandle *handleU, XType x, XType b)
.. doxygenfunction:: sptrsv_solve(KernelHandle *handleL, KernelHandle *handleU, XType x, XType b)
179 changes: 97 additions & 82 deletions sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp

Large diffs are not rendered by default.

326 changes: 205 additions & 121 deletions sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp

Large diffs are not rendered by default.

38 changes: 22 additions & 16 deletions sparse/impl/KokkosSparse_sptrsv_solve_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ template <class ExecutionSpace, class KernelHandle, class RowMapType,
ExecutionSpace, KernelHandle, RowMapType, EntriesType, ValuesType,
BType, XType>::value>
struct SPTRSV_SOLVE {
static void sptrsv_solve(KernelHandle *handle, const RowMapType row_map,
const EntriesType entries, const ValuesType values,
BType b, XType x);
static void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle,
const RowMapType row_map, const EntriesType entries,
const ValuesType values, BType b, XType x);

static void sptrsv_solve_streams(
const std::vector<ExecutionSpace> &execspace_v,
Expand All @@ -117,50 +117,54 @@ template <class ExecutionSpace, class KernelHandle, class RowMapType,
struct SPTRSV_SOLVE<ExecutionSpace, KernelHandle, RowMapType, EntriesType,
ValuesType, BType, XType, false,
KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
static void sptrsv_solve(KernelHandle *handle, const RowMapType row_map,
const EntriesType entries, const ValuesType values,
BType b, XType x) {
static void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle,
const RowMapType row_map, const EntriesType entries,
const ValuesType values, BType b, XType x) {
// Call specific algorithm type
auto sptrsv_handle = handle->get_sptrsv_handle();
Kokkos::Profiling::pushRegion(sptrsv_handle->is_lower_tri()
? "KokkosSparse_sptrsv[lower]"
: "KokkosSparse_sptrsv[upper]");
if (sptrsv_handle->is_lower_tri()) {
if (sptrsv_handle->is_symbolic_complete() == false) {
Experimental::lower_tri_symbolic(*sptrsv_handle, row_map, entries);
Experimental::lower_tri_symbolic(space, *sptrsv_handle, row_map,
entries);
}
if (sptrsv_handle->get_algorithm() ==
KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) {
Experimental::tri_solve_chain(*sptrsv_handle, row_map, entries, values,
b, x, true);
Experimental::tri_solve_chain(space, *sptrsv_handle, row_map, entries,
values, b, x, true);
} else {
#ifdef KOKKOSKERNELS_SPTRSV_CUDAGRAPHSUPPORT
using ExecSpace = typename RowMapType::memory_space::execution_space;
if (std::is_same<ExecSpace, Kokkos::Cuda>::value)
// TODO: set stream in thandle's sptrsvCudaGraph
Experimental::lower_tri_solve_cg(*sptrsv_handle, row_map, entries,
values, b, x);
else
#endif
Experimental::lower_tri_solve(*sptrsv_handle, row_map, entries,
Experimental::lower_tri_solve(space, *sptrsv_handle, row_map, entries,
values, b, x);
}
} else {
if (sptrsv_handle->is_symbolic_complete() == false) {
Experimental::upper_tri_symbolic(*sptrsv_handle, row_map, entries);
Experimental::upper_tri_symbolic(space, *sptrsv_handle, row_map,
entries);
}
if (sptrsv_handle->get_algorithm() ==
KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN) {
Experimental::tri_solve_chain(*sptrsv_handle, row_map, entries, values,
b, x, false);
Experimental::tri_solve_chain(space, *sptrsv_handle, row_map, entries,
values, b, x, false);
} else {
#ifdef KOKKOSKERNELS_SPTRSV_CUDAGRAPHSUPPORT
using ExecSpace = typename RowMapType::memory_space::execution_space;
if (std::is_same<ExecSpace, Kokkos::Cuda>::value)
// TODO: set stream in thandle's sptrsvCudaGraph
Experimental::upper_tri_solve_cg(*sptrsv_handle, row_map, entries,
values, b, x);
else
#endif
Experimental::upper_tri_solve(*sptrsv_handle, row_map, entries,
Experimental::upper_tri_solve(space, *sptrsv_handle, row_map, entries,
values, b, x);
}
}
Expand Down Expand Up @@ -188,7 +192,8 @@ struct SPTRSV_SOLVE<ExecutionSpace, KernelHandle, RowMapType, EntriesType,
if (sptrsv_handle_v[0]->is_lower_tri()) {
for (int i = 0; i < static_cast<int>(execspace_v.size()); i++) {
if (sptrsv_handle_v[i]->is_symbolic_complete() == false) {
Experimental::lower_tri_symbolic(*(sptrsv_handle_v[i]), row_map_v[i],
Experimental::lower_tri_symbolic(execspace_v[i],
*(sptrsv_handle_v[i]), row_map_v[i],
entries_v[i]);
}
}
Expand All @@ -198,7 +203,8 @@ struct SPTRSV_SOLVE<ExecutionSpace, KernelHandle, RowMapType, EntriesType,
} else {
for (int i = 0; i < static_cast<int>(execspace_v.size()); i++) {
if (sptrsv_handle_v[i]->is_symbolic_complete() == false) {
Experimental::upper_tri_symbolic(*(sptrsv_handle_v[i]), row_map_v[i],
Experimental::upper_tri_symbolic(execspace_v[i],
*(sptrsv_handle_v[i]), row_map_v[i],
entries_v[i]);
}
}
Expand Down
57 changes: 32 additions & 25 deletions sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,10 @@ void symbolic_chain_phase(TriSolveHandle& thandle,
#endif
} // end symbolic_chain_phase

template <class TriSolveHandle, class RowMapType, class EntriesType>
void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
const EntriesType dentries) {
template <class ExecSpaceIn, class TriSolveHandle, class RowMapType,
class EntriesType>
void lower_tri_symbolic(ExecSpaceIn& space, TriSolveHandle& thandle,
const RowMapType drow_map, const EntriesType dentries) {
#ifdef TRISOLVE_SYMB_TIMERS
Kokkos::Timer timer_sym_lowertri_total;
Kokkos::Timer timer;
Expand Down Expand Up @@ -177,10 +178,10 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
size_type nrows = drow_map.extent(0) - 1;

auto row_map = Kokkos::create_mirror_view(drow_map);
Kokkos::deep_copy(row_map, drow_map);
Kokkos::deep_copy(space, row_map, drow_map);

auto entries = Kokkos::create_mirror_view(dentries);
Kokkos::deep_copy(entries, dentries);
Kokkos::deep_copy(space, entries, dentries);

// get device view - will deep_copy to it at end of this host routine
DeviceEntriesType dnodes_per_level = thandle.get_nodes_per_level();
Expand All @@ -193,11 +194,12 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,

DeviceSignedEntriesType dlevel_list = thandle.get_level_list();
HostSignedEntriesType level_list = Kokkos::create_mirror_view(dlevel_list);
Kokkos::deep_copy(level_list, dlevel_list);
Kokkos::deep_copy(space, level_list, dlevel_list);

signed_integral_t level = 0;
size_type node_count = 0;

space.fence(); // wait for deep copy write to land
typename DeviceEntriesType::HostMirror level_ptr(
"lp", nrows + 1); // temp View used for index bookkeeping
level_ptr(0) = 0;
Expand Down Expand Up @@ -227,9 +229,9 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,

// Create the chain now
if (thandle.algm_requires_symb_chain()) {
// No need to pass in space, chain phase runs on the host
symbolic_chain_phase(thandle, nodes_per_level);
}

thandle.set_symbolic_complete();

// Output check
Expand Down Expand Up @@ -257,9 +259,9 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
#endif

// Deep copy to device views
Kokkos::deep_copy(dnodes_grouped_by_level, nodes_grouped_by_level);
Kokkos::deep_copy(dnodes_per_level, nodes_per_level);
Kokkos::deep_copy(dlevel_list, level_list);
Kokkos::deep_copy(space, dnodes_grouped_by_level, nodes_grouped_by_level);
Kokkos::deep_copy(space, dnodes_per_level, nodes_per_level);
Kokkos::deep_copy(space, dlevel_list, level_list);

// Extra check:
#ifdef LVL_OUTPUT_INFO
Expand All @@ -279,6 +281,7 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
check_count);
std::cout << " host check_count= " << check_count << std::endl;

space.fence(); // wait for deep copy writes to land
check_count = 0; // reset
Kokkos::parallel_reduce(
"check_count device",
Expand Down Expand Up @@ -568,20 +571,21 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
thandle.set_workspace_size(max_lwork);
// workspace offset initialized to be zero
integer_view_t work_offset = thandle.get_work_offset();
Kokkos::deep_copy(work_offset, work_offset_host);
Kokkos::deep_copy(space, work_offset, work_offset_host);

// kernel types
// > off-diagonal
integer_view_t dkernel_type_by_level = thandle.get_kernel_type();
Kokkos::deep_copy(dkernel_type_by_level, kernel_type_by_level);
Kokkos::deep_copy(space, dkernel_type_by_level, kernel_type_by_level);
// > diagonal
integer_view_t ddiag_kernel_type_by_level = thandle.get_diag_kernel_type();
Kokkos::deep_copy(ddiag_kernel_type_by_level, diag_kernel_type_by_level);
Kokkos::deep_copy(space, ddiag_kernel_type_by_level,
diag_kernel_type_by_level);

// deep copy to device (of scheduling info)
Kokkos::deep_copy(dnodes_grouped_by_level, nodes_grouped_by_level);
Kokkos::deep_copy(dnodes_per_level, nodes_per_level);
Kokkos::deep_copy(dlevel_list, level_list);
Kokkos::deep_copy(space, dnodes_grouped_by_level, nodes_grouped_by_level);
Kokkos::deep_copy(space, dnodes_per_level, nodes_per_level);
Kokkos::deep_copy(space, dlevel_list, level_list);

#ifdef TRISOLVE_SYMB_TIMERS
std::cout << " + workspace time = " << timer.seconds() << std::endl;
Expand All @@ -598,9 +602,10 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
#endif
} // end lower_tri_symbolic

template <class TriSolveHandle, class RowMapType, class EntriesType>
void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
const EntriesType dentries) {
template <class ExecutionSpace, class TriSolveHandle, class RowMapType,
class EntriesType>
void upper_tri_symbolic(ExecutionSpace& space, TriSolveHandle& thandle,
const RowMapType drow_map, const EntriesType dentries) {
#ifdef TRISOLVE_SYMB_TIMERS
Kokkos::Timer timer_sym_uppertri_total;
Kokkos::Timer timer;
Expand All @@ -626,10 +631,10 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
size_type nrows = drow_map.extent(0) - 1;

auto row_map = Kokkos::create_mirror_view(drow_map);
Kokkos::deep_copy(row_map, drow_map);
Kokkos::deep_copy(space, row_map, drow_map);

auto entries = Kokkos::create_mirror_view(dentries);
Kokkos::deep_copy(entries, dentries);
Kokkos::deep_copy(space, entries, dentries);

// get device view - will deep_copy to it at end of this host routine
DeviceEntriesType dnodes_per_level = thandle.get_nodes_per_level();
Expand All @@ -642,11 +647,12 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,

DeviceSignedEntriesType dlevel_list = thandle.get_level_list();
HostSignedEntriesType level_list = Kokkos::create_mirror_view(dlevel_list);
Kokkos::deep_copy(level_list, dlevel_list);
Kokkos::deep_copy(space, level_list, dlevel_list);

signed_integral_t level = 0;
size_type node_count = 0;

space.fence(); // Wait for deep copy writes to land
typename DeviceEntriesType::HostMirror level_ptr(
"lp", nrows + 1); // temp View used for index bookkeeping
level_ptr(0) = 0;
Expand Down Expand Up @@ -708,9 +714,9 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
#endif

// Deep copy to device views
Kokkos::deep_copy(dnodes_grouped_by_level, nodes_grouped_by_level);
Kokkos::deep_copy(dnodes_per_level, nodes_per_level);
Kokkos::deep_copy(dlevel_list, level_list);
Kokkos::deep_copy(space, dnodes_grouped_by_level, nodes_grouped_by_level);
Kokkos::deep_copy(space, dnodes_per_level, nodes_per_level);
Kokkos::deep_copy(space, dlevel_list, level_list);

// Extra check:
#ifdef LVL_OUTPUT_INFO
Expand All @@ -730,6 +736,7 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map,
check_count);
std::cout << " host check_count= " << check_count << std::endl;

space.fence(); // wait for deep copy writes to land
check_count = 0; // reset
Kokkos::parallel_reduce(
"check_count device",
Expand Down
22 changes: 14 additions & 8 deletions sparse/impl/KokkosSparse_sptrsv_symbolic_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,33 +67,37 @@ namespace Impl {
// Unification layer
/// \brief Implementation of KokkosSparse::sptrsv_symbolic

template <class KernelHandle, class RowMapType, class EntriesType,
template <class ExecutionSpace, class KernelHandle, class RowMapType,
class EntriesType,
bool tpl_spec_avail = sptrsv_symbolic_tpl_spec_avail<
KernelHandle, RowMapType, EntriesType>::value,
bool eti_spec_avail = sptrsv_symbolic_eti_spec_avail<
KernelHandle, RowMapType, EntriesType>::value>
struct SPTRSV_SYMBOLIC {
static void sptrsv_symbolic(KernelHandle *handle, const RowMapType row_map,
static void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle,
const RowMapType row_map,
const EntriesType entries);
};

#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
//! Full specialization of sptrsv_symbolic
// Unification layer
template <class KernelHandle, class RowMapType, class EntriesType>
struct SPTRSV_SYMBOLIC<KernelHandle, RowMapType, EntriesType, false,
KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
static void sptrsv_symbolic(KernelHandle *handle, const RowMapType row_map,
template <class ExecutionSpace, class KernelHandle, class RowMapType,
class EntriesType>
struct SPTRSV_SYMBOLIC<ExecutionSpace, KernelHandle, RowMapType, EntriesType,
false, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
static void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle,
const RowMapType row_map,
const EntriesType entries) {
auto sptrsv_handle = handle->get_sptrsv_handle();
auto nrows = row_map.extent(0) - 1;
sptrsv_handle->new_init_handle(nrows);

if (sptrsv_handle->is_lower_tri()) {
Experimental::lower_tri_symbolic(*sptrsv_handle, row_map, entries);
Experimental::lower_tri_symbolic(space, *sptrsv_handle, row_map, entries);
sptrsv_handle->set_symbolic_complete();
} else {
Experimental::upper_tri_symbolic(*sptrsv_handle, row_map, entries);
Experimental::upper_tri_symbolic(space, *sptrsv_handle, row_map, entries);
sptrsv_handle->set_symbolic_complete();
}
}
Expand All @@ -113,6 +117,7 @@ struct SPTRSV_SYMBOLIC<KernelHandle, RowMapType, EntriesType, false,
SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \
MEM_SPACE_TYPE) \
extern template struct SPTRSV_SYMBOLIC< \
EXEC_SPACE_TYPE, \
KokkosKernels::Experimental::KokkosKernelsHandle< \
const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \
EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \
Expand All @@ -130,6 +135,7 @@ struct SPTRSV_SYMBOLIC<KernelHandle, RowMapType, EntriesType, false,
SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \
MEM_SPACE_TYPE) \
template struct SPTRSV_SYMBOLIC< \
EXEC_SPACE_TYPE, \
KokkosKernels::Experimental::KokkosKernelsHandle< \
const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \
EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \
Expand Down
Loading

0 comments on commit bf14408

Please sign in to comment.