diff --git a/CMakeLists.txt b/CMakeLists.txt index fc41d40452..fb5d0591d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -375,6 +375,7 @@ ELSE() KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC MKL) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUBLAS) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSPARSE) + KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSOLVER) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCBLAS) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSPARSE) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSOLVER) diff --git a/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp b/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp index e6dfef7c6d..869c065af2 100644 --- a/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp +++ b/blas/tpls/KokkosBlas2_syr2_tpl_spec_decl_rocblas.hpp @@ -163,7 +163,7 @@ namespace Impl { YViewType; \ typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ + Kokkos::MemoryTraits > \ AViewType; \ \ static void syr2(const typename AViewType::execution_space& space, \ diff --git a/cm_generate_makefile.bash b/cm_generate_makefile.bash index 3358ae2eb8..426827db00 100755 --- a/cm_generate_makefile.bash +++ b/cm_generate_makefile.bash @@ -178,6 +178,7 @@ get_kernels_tpls_list() { KOKKOSKERNELS_USER_TPL_LIBNAME_CMD= CUBLAS_DEFAULT=OFF CUSPARSE_DEFAULT=OFF + CUSOLVER_DEFAULT=OFF ROCBLAS_DEFAULT=OFF ROCSPARSE_DEFAULT=OFF PARSE_TPLS_LIST=$(echo $KOKKOSKERNELS_TPLS | tr "," "\n") @@ -191,6 +192,9 @@ get_kernels_tpls_list() { if [ "$UC_TPLS" == "CUSPARSE" ]; then CUSPARSE_DEFAULT=ON fi + if [ "$UC_TPLS" == "CUSOLVER" ]; then + CUSOLVER_DEFAULT=ON + fi if [ "$UC_TPLS" == "ROCBLAS" ]; then ROCBLAS_DEFAULT=ON fi @@ -224,6 +228,9 @@ get_kernels_tpls_list() { if [ "$CUSPARSE_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSPARSE=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi + if [ "$CUSOLVER_DEFAULT" == "OFF" ]; then + KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSOLVER=OFF ${KOKKOSKERNELS_TPLS_CMD}" + fi if [ "$ROCBLAS_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCBLAS=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index d5b2a1d8e9..13223259ef 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1,6 +1,6 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_REQUIRED_PACKAGES Kokkos - LIB_OPTIONAL_TPLS quadmath MKL BLAS LAPACK METIS SuperLU Cholmod CUBLAS CUSPARSE ROCBLAS ROCSPARSE + LIB_OPTIONAL_TPLS quadmath MKL BLAS LAPACK METIS SuperLU Cholmod CUBLAS CUSPARSE CUSOLVER ROCBLAS ROCSPARSE TEST_OPTIONAL_TPLS yaml-cpp ) # NOTE: If you update names in LIB_OPTIONAL_TPLS above, make sure to map those names in diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index c40a2b18a7..6f5b07f287 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -114,10 +114,12 @@ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_LAPACK /* MKL library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MKL -/* CUSPARSE */ -#cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSPARSE /* CUBLAS */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUBLAS +/* CUSPARSE */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +/* CUSOLVER */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSOLVER /* MAGMA */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MAGMA /* SuperLU */ diff --git a/cmake/Modules/FindTPLCUSOLVER.cmake b/cmake/Modules/FindTPLCUSOLVER.cmake new file mode 100644 index 0000000000..4b75aefd65 --- /dev/null +++ b/cmake/Modules/FindTPLCUSOLVER.cmake @@ -0,0 +1,17 @@ +FIND_PACKAGE(CUDA) + +INCLUDE(FindPackageHandleStandardArgs) +IF (NOT CUDA_FOUND) + #Important note here: this find Module is named TPLCUSOLVER + #The eventual target is named CUSOLVER. To avoid naming conflicts + #the find module is called TPLCUSOLVER. This call will cause + #the find_package call to fail in a "standard" CMake way + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_FOUND) +ELSE() + #The libraries might be empty - OR they might explicitly be not found + IF("${CUDA_cusolver_LIBRARY}" MATCHES "NOTFOUND") + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_cusolver_LIBRARY) + ELSE() + KOKKOSKERNELS_CREATE_IMPORTED_TPL(CUSOLVER LIBRARY ${CUDA_cusolver_LIBRARY}) + ENDIF() +ENDIF() diff --git a/cmake/kokkoskernels_features.cmake b/cmake/kokkoskernels_features.cmake index 3ecc95d6b5..cbd2a848ef 100644 --- a/cmake/kokkoskernels_features.cmake +++ b/cmake/kokkoskernels_features.cmake @@ -39,3 +39,11 @@ ELSEIF (KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER AND NOT KOKKOSKERNELS_ENABLE_TPL_ROCS ELSEIF (KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER AND NOT KOKKOSKERNELS_ENABLE_TPL_ROCBLAS) MESSAGE(FATAL_ERROR "rocSOLVER requires rocBLAS, please reconfigure with KOKKOSKERNELS_ENABLE_TPL_ROCBLAS:BOOL=ON.") ENDIF() + +IF (KOKKOSKERNELS_ENABLE_TPL_CUSOLVER AND NOT KOKKOSKERNELS_ENABLE_TPL_CUBLAS AND NOT KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) + MESSAGE(FATAL_ERROR "cuSOLVER requires cuBLAS and cuSPARSE, please reconfigure with KOKKOSKERNELS_ENABLE_TPL_CUBLAS:BOOL=ON and KOKKOSKERNELS_ENABLE_TPL_CUSPARSE:BOOL=ON.") +ELSEIF (KOKKOSKERNELS_ENABLE_TPL_CUSOLVER AND NOT KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) + MESSAGE(FATAL_ERROR "cuSOLVER requires cuSPARSE, please reconfigure with KOKKOSKERNELS_ENABLE_TPL_CUSPARSE:BOOL=ON.") +ELSEIF (KOKKOSKERNELS_ENABLE_TPL_CUSOLVER AND NOT KOKKOSKERNELS_ENABLE_TPL_CUBLAS) + MESSAGE(FATAL_ERROR "cuSOLVER requires cuBLAS, please reconfigure with KOKKOSKERNELS_ENABLE_TPL_CUBLAS:BOOL=ON.") +ENDIF() diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index 2f54278d1b..d1a44721e6 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -447,14 +447,18 @@ ENDIF() KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_CUDA_TPLS OFF BOOL "Whether CUDA TPLs should be enabled by default. Default: OFF") SET(CUBLAS_DEFAULT ${KOKKOS_ENABLE_CUDA}) SET(CUSPARSE_DEFAULT ${KOKKOS_ENABLE_CUDA}) +SET(CUSOLVER_DEFAULT ${KOKKOS_ENABLE_CUDA}) IF(KOKKOSKERNELS_NO_DEFAULT_CUDA_TPLS) SET(CUBLAS_DEFAULT OFF) SET(CUSPARSE_DEFAULT OFF) + SET(CUSOLVER_DEFAULT OFF) ENDIF() KOKKOSKERNELS_ADD_TPL_OPTION(CUBLAS ${CUBLAS_DEFAULT} "Whether to enable CUBLAS" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_TPL_OPTION(CUSPARSE ${CUSPARSE_DEFAULT} "Whether to enable CUSPARSE" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") +KOKKOSKERNELS_ADD_TPL_OPTION(CUSOLVER ${CUSOLVER_DEFAULT} "Whether to enable CUSOLVER" + DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_ROCM_TPLS OFF BOOL "Whether ROCM TPLs should be enabled by default. Default: OFF") # Unlike CUDA, ROCm does not automatically install these TPLs @@ -501,6 +505,7 @@ IF (NOT KOKKOSKERNELS_HAS_TRILINOS) KOKKOSKERNELS_IMPORT_TPL(MKL) KOKKOSKERNELS_IMPORT_TPL(CUBLAS) KOKKOSKERNELS_IMPORT_TPL(CUSPARSE) + KOKKOSKERNELS_IMPORT_TPL(CUSOLVER) KOKKOSKERNELS_IMPORT_TPL(CBLAS) KOKKOSKERNELS_IMPORT_TPL(LAPACKE) KOKKOSKERNELS_IMPORT_TPL(CHOLMOD) diff --git a/common/src/KokkosKernels_PrintConfiguration.hpp b/common/src/KokkosKernels_PrintConfiguration.hpp index 55e7285ed2..c2e3a5187f 100644 --- a/common/src/KokkosKernels_PrintConfiguration.hpp +++ b/common/src/KokkosKernels_PrintConfiguration.hpp @@ -44,6 +44,18 @@ inline void print_cusparse_version_if_enabled(std::ostream& os) { << "KOKKOSKERNELS_ENABLE_TPL_CUSPARSE: no\n"; #endif } + +inline void print_cusolver_version_if_enabled(std::ostream& os) { +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + os << " " + << "KOKKOSKERNELS_ENABLE_TPL_CUSOLVER: " << cusolver_version_string() + << "\n"; +#else + os << " " + << "KOKKOSKERNELS_ENABLE_TPL_CUSOLVER: no\n"; +#endif +} + inline void print_enabled_tpls(std::ostream& os) { #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK os << " " @@ -96,6 +108,7 @@ inline void print_enabled_tpls(std::ostream& os) { #endif print_cublas_version_if_enabled(os); print_cusparse_version_if_enabled(os); + print_cusolver_version_if_enabled(os); #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS os << " " << "KOKKOSKERNELS_ENABLE_TPL_ROCBLAS: yes\n"; diff --git a/common/src/KokkosKernels_TplsVersion.hpp b/common/src/KokkosKernels_TplsVersion.hpp index 38de7c1399..3e00d72457 100644 --- a/common/src/KokkosKernels_TplsVersion.hpp +++ b/common/src/KokkosKernels_TplsVersion.hpp @@ -28,6 +28,10 @@ #include "cusparse.h" #endif +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) +#include "cusolver_common.h" +#endif + namespace KokkosKernels { #if defined(KOKKOSKERNELS_ENABLE_TPL_CUBLAS) @@ -53,5 +57,16 @@ inline std::string cusparse_version_string() { } #endif +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) +inline std::string cusolver_version_string() { + std::stringstream ss; + + ss << CUSOLVER_VER_MAJOR << "." << CUSOLVER_VER_MINOR << "." + << CUSOLVER_VER_PATCH << "." << CUSOLVER_VER_BUILD; + + return ss.str(); +} +#endif + } // namespace KokkosKernels #endif // _KOKKOSKERNELS_TPLS_VERSIONS_HPP diff --git a/lapack/impl/KokkosLapack_gesv_spec.hpp b/lapack/impl/KokkosLapack_gesv_spec.hpp index 57098f75fc..97d74280ff 100644 --- a/lapack/impl/KokkosLapack_gesv_spec.hpp +++ b/lapack/impl/KokkosLapack_gesv_spec.hpp @@ -90,7 +90,7 @@ struct GESV +#include "KokkosLapack_cusolver.hpp" namespace KokkosLapack { namespace Impl { diff --git a/lapack/tpls/KokkosLapack_cusolver.hpp b/lapack/tpls/KokkosLapack_cusolver.hpp new file mode 100644 index 0000000000..006fd68b6f --- /dev/null +++ b/lapack/tpls/KokkosLapack_cusolver.hpp @@ -0,0 +1,92 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_CUSOLVER_HPP_ +#define KOKKOSLAPACK_CUSOLVER_HPP_ + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include + +namespace KokkosLapack { +namespace Impl { + +// Declaration of the singleton for cusolver +// this is the only header that needs to be +// included when using cusolverDn. +struct CudaLapackSingleton { + cusolverDnHandle_t handle; + + CudaLapackSingleton(); + + static CudaLapackSingleton& singleton(); +}; + +inline void cusolver_internal_error_throw(cusolverStatus_t cusolverStatus, + const char* name, const char* file, + const int line) { + std::ostringstream out; + out << name << " error( "; + switch (cusolverStatus) { + case CUSOLVER_STATUS_NOT_INITIALIZED: + out << "CUSOLVER_STATUS_NOT_INITIALIZED): cusolver handle was not " + "created correctly."; + break; + case CUSOLVER_STATUS_ALLOC_FAILED: + out << "CUSOLVER_STATUS_ALLOC_FAILED): you might tried to allocate too " + "much memory"; + break; + case CUSOLVER_STATUS_INVALID_VALUE: + out << "CUSOLVER_STATUS_INVALID_VALUE)"; + break; + case CUSOLVER_STATUS_ARCH_MISMATCH: + out << "CUSOLVER_STATUS_ARCH_MISMATCH)"; + break; + case CUSOLVER_STATUS_EXECUTION_FAILED: + out << "CUSOLVER_STATUS_EXECUTION_FAILED)"; + break; + case CUSOLVER_STATUS_INTERNAL_ERROR: + out << "CUSOLVER_STATUS_INTERNAL_ERROR)"; + break; + case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: + out << "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; + break; + default: out << "unrecognized error code): this is bad!"; break; + } + if (file) { + out << " " << file << ":" << line; + } + throw std::runtime_error(out.str()); +} + +inline void cusolver_internal_safe_call(cusolverStatus_t cusolverStatus, + const char* name, + const char* file = nullptr, + const int line = 0) { + if (CUSOLVER_STATUS_SUCCESS != cusolverStatus) { + cusolver_internal_error_throw(cusolverStatus, name, file, line); + } +} + +// The macro below defines is the public interface for the safe cusolver calls. +// The functions themselves are protected by impl namespace. +#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, \ + __LINE__) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#endif // KOKKOSLAPACK_CUSOLVER_HPP_ diff --git a/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp index e7bc5425f7..b7c336681f 100644 --- a/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp @@ -79,6 +79,37 @@ KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, } // namespace Impl } // namespace KokkosLapack +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gesv_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // CUSOLVER + #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER #include diff --git a/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp index d3a71a0cfa..82f7aea64a 100644 --- a/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp @@ -76,28 +76,37 @@ void lapackGesvWrapper(const AViewType& A, const BViewType& B, } } -#define KOKKOSLAPACK_GESV_LAPACK(SCALAR, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ +#define KOKKOSLAPACK_GESV_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ + template <> \ struct GESV< \ - ExecSpace, \ - Kokkos::View, \ + EXECSPACE, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ + true, \ + gesv_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ using AViewType = \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ using BViewType = \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ using PViewType = \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ \ - static void gesv(const ExecSpace& /* space */, const AViewType& A, \ + static void gesv(const EXECSPACE& /* space */, const AViewType& A, \ const BViewType& B, const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK," #SCALAR \ "]"); \ @@ -107,21 +116,27 @@ void lapackGesvWrapper(const AViewType& A, const BViewType& B, } \ }; -KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace, false) - -KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace, false) - +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) - + Kokkos::Serial, Kokkos::HostSpace) KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) + Kokkos::Serial, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_GESV_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP, Kokkos::HostSpace) KOKKOSLAPACK_GESV_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) + Kokkos::OpenMP, Kokkos::HostSpace) +#endif } // namespace Impl } // namespace KokkosLapack @@ -390,6 +405,161 @@ KOKKOSLAPACK_CGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) } // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, + const AViewType& A, const BViewType& B) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename BViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + using BLayout_t = typename BViewType::array_layout; + + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = std::is_same_v ? A.stride(0) + : A.stride(1); + + (void)B; + + const int nrhs = B.extent_int(1); + const int ldb = std::is_same_v ? B.stride(0) + : B.stride(1); + int lwork = 0; + Kokkos::View info("getrf info"); + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgetrf(s.handle, m, n, A.data(), + lda, Workspace.data(), + IPIV.data(), info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, + IPIV.data(), B.data(), ldb, info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgetrf_bufferSize(s.handle, m, n, A.data(), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgetrf(s.handle, m, n, A.data(), + lda, Workspace.data(), + IPIV.data(), info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgetrs(s.handle, CUBLAS_OP_N, m, nrhs, A.data(), lda, + IPIV.data(), B.data(), ldb, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrf_bufferSize( + s.handle, m, n, reinterpret_cast(A.data()), lda, &lwork)); + Kokkos::View Workspace("getrf workspace", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgetrf(s.handle, m, n, reinterpret_cast(A.data()), + lda, reinterpret_cast(Workspace.data()), + IPIV.data(), info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgetrs( + s.handle, CUBLAS_OP_N, m, nrhs, reinterpret_cast(A.data()), + lda, IPIV.data(), reinterpret_cast(B.data()), ldb, + info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf_bufferSize( + s.handle, m, n, reinterpret_cast(A.data()), lda, + &lwork)); + Kokkos::View Workspace("getrf workspace", + lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrf( + s.handle, m, n, reinterpret_cast(A.data()), lda, + reinterpret_cast(Workspace.data()), IPIV.data(), + info.data())); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgetrs( + s.handle, CUBLAS_OP_N, m, nrhs, + reinterpret_cast(A.data()), lda, IPIV.data(), + reinterpret_cast(B.data()), ldb, info.data())); + } + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GESV_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GESV< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + gesv_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using BViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using PViewType = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void gesv(const Kokkos::Cuda& space, const AViewType& A, \ + const BViewType& B, const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_CUSOLVER," #SCALAR \ + "]"); \ + gesv_print_specialization(); \ + \ + cusolverGesvWrapper(space, IPIV, A, B); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GESV_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GESV_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GESV_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + // ROCSOLVER #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER #include @@ -445,28 +615,40 @@ void rocsolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } -#define KOKKOSLAPACK_GESV_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ +#define KOKKOSLAPACK_GESV_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ struct GESV< \ - ExecSpace, \ - Kokkos::View, \ + Kokkos::HIP, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ + true, \ + gesv_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ using AViewType = \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ using BViewType = \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>; \ using PViewType = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ \ - static void gesv(const ExecSpace& space, const AViewType& A, \ + static void gesv(const Kokkos::HIP& space, const AViewType& A, \ const BViewType& B, const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ "KokkosLapack::gesv[TPL_ROCSOLVER," #SCALAR "]"); \ @@ -477,21 +659,12 @@ void rocsolverGesvWrapper(const ExecutionSpace& space, const IPIVViewType& IPIV, } \ }; -KOKKOSLAPACK_GESV_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) -KOKKOSLAPACK_GESV_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) - -KOKKOSLAPACK_GESV_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace, true) -KOKKOSLAPACK_GESV_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace, false) - +KOKKOSLAPACK_GESV_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GESV_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) + Kokkos::HIPSpace) KOKKOSLAPACK_GESV_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) + Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/unit_test/Test_Lapack_gesv.hpp b/lapack/unit_test/Test_Lapack_gesv.hpp index e1cf743f91..318f9f06ae 100644 --- a/lapack/unit_test/Test_Lapack_gesv.hpp +++ b/lapack/unit_test/Test_Lapack_gesv.hpp @@ -15,15 +15,15 @@ //@HEADER // only enable this test where KokkosLapack supports gesv: -// CUDA+MAGMA and HOST+LAPACK -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)) || \ - (defined(TEST_HIP_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || \ - defined(TEST_OPENMPTARGET_LAPACK_CPP) || \ - defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) +// CUDA+(MAGMA or CUSOLVER), HIP+ROCSOLVER and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + (defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER))) || \ + (defined(TEST_HIP_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || \ + defined(TEST_THREADS_LAPACK_CPP))) #include #include @@ -130,14 +130,16 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { // Checking vs ref on CPU, this eps is about 10^-9 typedef typename ats::mag_type mag_type; - const mag_type eps = 1.0e7 * ats::epsilon(); + const mag_type eps = 2.0e7 * ats::epsilon(); bool test_flag = true; for (int i = 0; i < N; i++) { if (ats::abs(h_B(i) - h_X0(i)) > eps) { test_flag = false; - // printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != - // solution( %.15lf ) at (%d)\n", N, mode[0], padding[0], - // ats::abs(h_B(i)), ats::abs(h_X0(i)), int(i) ); + // printf( + // " Error %d, pivot %c, padding %c: result( %.15lf ) !=" + // "solution( %.15lf ) at (%d), error=%.15e, eps=%.15e\n", + // N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), + // int(i), ats::abs(h_B(i) - h_X0(i)), eps); // break; } } @@ -425,4 +427,4 @@ TEST_F(TestCategory, gesv_mrhs_complex_float) { } #endif -#endif // CUDA+MAGMA or HIP+ROCSOLVER or LAPACK+HOST +#endif // CUDA+(MAGMA or CUSOLVER) or HIP+ROCSOLVER or LAPACK+HOST diff --git a/scripts/cm_test_all_sandia b/scripts/cm_test_all_sandia index 3a8079dc66..fda38735a0 100755 --- a/scripts/cm_test_all_sandia +++ b/scripts/cm_test_all_sandia @@ -91,7 +91,7 @@ print_help() { echo "--with-tpls=TPLS: set KOKKOSKERNELS_ENABLE_TPLS" echo " Provide a comma-separated list of TPLs" echo " Valid items:" - echo " blas, mkl, cublas, cusparse, magma, armpl, rocblas, rocsparse, rocsolver" + echo " blas, mkl, cublas, cusparse, cusolver, magma, armpl, rocblas, rocsparse, rocsolver" echo "" echo "ARGS: list of expressions matching compilers to test" @@ -1083,7 +1083,7 @@ setup_env() { if [[ "${SPOT_CHECK_TPLS}" = "True" ]]; then # device tpls if [[ "$compiler" == cuda* ]]; then - NEW_TPL_LIST="cublas,cusparse," + NEW_TPL_LIST="cublas,cusparse,cusolver," export KOKKOS_CUDA_OPTIONS="${KOKKOS_CUDA_OPTIONS},enable_lambda" fi if [[ "$compiler" == rocm* ]]; then