From 6a55d793427baf19ebb5e1fb5f38aef28fb1e1a4 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 7 Nov 2023 10:04:37 -0700 Subject: [PATCH 1/4] NRM1: refactoring TPL layer a bit with c++17 if constexpr Hopefully this leads to simpler code, less duplication, less macro and easier maintenance! Adding support for oneapi MKL while making tpl layer changes. --- blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp | 34 + blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 772 +++++++----------- blas/unit_test/Test_Blas1_nrm1.hpp | 8 +- 3 files changed, 335 insertions(+), 479 deletions(-) diff --git a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp index 04ec811990..be0a45c7be 100644 --- a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp @@ -113,6 +113,40 @@ KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, #endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +// oneMKL +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL + +#if defined(KOKKOS_ENABLE_SYCL) && \ + !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) + +#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct nrm1_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View< \ + typename Kokkos::Details::InnerProductSpaceTraits::mag_type, \ + LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ + }; + +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( + double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( + float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( + Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCLDeviceUSMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL( + Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCLDeviceUSMSpace) + +#endif // KOKKOS_ENABLE_SYCL +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL + } // namespace Impl } // namespace KokkosBlas #endif diff --git a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index b5b6e061ec..2e2c98a579 100644 --- a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -39,32 +39,39 @@ inline void nrm1_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ template \ struct Nrm1< \ ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ + using mag_type = typename Kokkos::ArithTraits::mag_type; \ + using RV = Kokkos::View>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ \ static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_BLAS,double]"); \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_BLAS," #SCALAR "]"); \ const size_type numElems = X.extent(0); \ if (numElems < static_cast(INT_MAX)) { \ nrm1_print_specialization(); \ int N = numElems; \ int one = 1; \ - R() = HostBlas::asum(N, X.data(), one); \ + if constexpr (Kokkos::ArithTraits::is_complex) { \ + R() = HostBlas>::asum( \ + N, reinterpret_cast*>(X.data()), \ + one); \ + } else { \ + R() = HostBlas::asum(N, X.data(), one); \ + } \ } else { \ Nrm1::nrm1(space, R, X); \ } \ @@ -72,128 +79,25 @@ namespace Impl { } \ }; -#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm1< \ - ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_BLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - R() = HostBlas::asum(N, X.data(), one); \ - } else { \ - Nrm1::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, + Kokkos::HostSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, + Kokkos::HostSpace, false) -#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_BLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - R() = HostBlas >::asum( \ - N, reinterpret_cast*>(X.data()), one); \ - } else { \ - Nrm1::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, + Kokkos::HostSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, + Kokkos::HostSpace, false) -#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(LAYOUT, MEMSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_BLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - int N = numElems; \ - int one = 1; \ - R() = HostBlas >::asum( \ - N, reinterpret_cast*>(X.data()), one); \ - } else { \ - Nrm1::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; - -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HostSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HostSpace, false) -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) - -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - true) -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, - false) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HostSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HostSpace, false) } // namespace Impl } // namespace KokkosBlas @@ -207,40 +111,65 @@ KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ +template +void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, + const XViewType& X) { + using XScalar = typename XViewType::non_const_value_type; + + nrm1_print_specialization(); + const int N = static_cast(X.extent(0)); + constexpr int one = 1; + KokkosBlas::Impl::CudaBlasSingleton& s = + KokkosBlas::Impl::CudaBlasSingleton::singleton(); + + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOS_CUBLAS_SAFE_CALL_IMPL( + cublasSasum(s.handle, N, X.data(), one, R.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_CUBLAS_SAFE_CALL_IMPL( + cublasDasum(s.handle, N, X.data(), one, R.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUBLAS_SAFE_CALL_IMPL( + cublasScasum(s.handle, N, reinterpret_cast(X.data()), + one, R.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDzasum( + s.handle, N, reinterpret_cast(X.data()), one, + R.data())); + } + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); +} + +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ + MEMSPACE, ETI_SPEC_AVAIL) \ template <> \ struct Nrm1< \ EXECSPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ 1, true, ETI_SPEC_AVAIL> { \ using execution_space = EXECSPACE; \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ + using RV = Kokkos::View::mag_type, \ + LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ \ static void nrm1(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,double]"); \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS," #SCALAR \ + "]"); \ const size_type numElems = X.extent(0); \ if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasDasum(s.handle, N, X.data(), one, R.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ + cublasAsumWrapper(space, R, X); \ } else { \ Nrm1::nrm1(space, \ R, X); \ @@ -249,160 +178,33 @@ namespace Impl { } \ }; -#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Nrm1< \ - EXECSPACE, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using execution_space = EXECSPACE; \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSasum(s.handle, N, X.data(), one, R.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - Nrm1::nrm1(space, \ - R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; - -#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using execution_space = EXECSPACE; \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDzasum( \ - s.handle, N, reinterpret_cast(X.data()), \ - one, R.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - Nrm1::nrm1(space, \ - R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; - -#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template <> \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - using execution_space = EXECSPACE; \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const execution_space& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ - cublasSetStream(s.handle, space.cuda_stream())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasScasum( \ - s.handle, N, reinterpret_cast(X.data()), one, \ - R.data())); \ - KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ - } else { \ - Nrm1::nrm1(space, \ - R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; - -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) + +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) + +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) + +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) } // namespace Impl } // namespace KokkosBlas - -#endif +#endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS // rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS @@ -411,39 +213,65 @@ KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_ROCBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ +template +void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, + const XViewType& X) { + using XScalar = typename XViewType::non_const_value_type; + + nrm1_print_specialization(); + const int N = static_cast(X.extent(0)); + constexpr int one = 1; + KokkosBlas::Impl::RocBlasSingleton& s = + KokkosBlas::Impl::RocBlasSingleton::singleton(); + + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_sasum(s.handle, N, X.data(), one, R.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_dasum(s.handle, N, X.data(), one, R.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_scasum( + s.handle, N, reinterpret_cast(X.data()), + one, R.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dzasum( + s.handle, N, reinterpret_cast(X.data()), + one, R.data())); + } + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ template \ struct Nrm1< \ ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ + using RV = Kokkos::View::mag_type, \ + LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ \ static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS,double]"); \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS," #SCALAR \ + "]"); \ const size_type numElems = X.extent(0); \ if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_dasum(s.handle, N, X.data(), one, R.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ + rocblasAsumWrapper(space, R, X); \ } else { \ Nrm1::nrm1(space, R, X); \ } \ @@ -451,155 +279,149 @@ namespace Impl { } \ }; -#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_ROCBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1< \ - ExecSpace, \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_sasum(s.handle, N, X.data(), one, R.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - Nrm1::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(float, Kokkos::LayoutLeft, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(float, Kokkos::LayoutLeft, + Kokkos::HIPSpace, false) + +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(double, Kokkos::LayoutLeft, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(double, Kokkos::LayoutLeft, + Kokkos::HIPSpace, false) + +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace, + true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace, + false) + +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace, + true) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace, + false) -#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_ROCBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_ROCBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dzasum( \ - s.handle, N, \ - reinterpret_cast(X.data()), one, \ - R.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - Nrm1::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; +} // namespace Impl +} // namespace KokkosBlas -#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_ROCBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_ROCBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::RocBlasSingleton& s = \ - KokkosBlas::Impl::RocBlasSingleton::singleton(); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ - rocblas_set_stream(s.handle, space.hip_stream())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_scasum( \ - s.handle, N, \ - reinterpret_cast(X.data()), one, \ - R.data())); \ - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ - } else { \ - Nrm1::nrm1(space, R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ - }; +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - true) -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - false) +// oneMKL +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - true) -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - false) +#if defined(KOKKOS_ENABLE_SYCL) && \ + !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - true) -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - false) +#include +#include -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - true) -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIPSpace, - false) +namespace KokkosBlas { +namespace Impl { + +template +void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, + const XViewType& X) { + using XScalar = typename XViewType::non_const_value_type; + using KAT_X = Kokkos::ArithTraits; + using layout_t = typename XViewType::array_layout; + + const std::int64_t N = static_cast(X.extent(0)); + const std::int64_t one = Kokkos::ArithTraits::one(); + + // Create temp view on device to store the result + Kokkos::View::mag_type, + typename XViewType::memory_space> + res("sycl asum result"); + + // Decide to call row_major or column_major function + if constexpr (std::is_same_v) { + if constexpr (KAT_X::is_complex) { + oneapi::mkl::blas::row_major::asum( + space.sycl_queue(), N, + reinterpret_cast*>( + X.data()), + 1, res.data()); + } else { + oneapi::mkl::blas::row_major::asum(space.sycl_queue(), N, X.data(), 1, + res.data()); + } + } else { + if constexpr (KAT_X::is_complex) { + oneapi::mkl::blas::column_major::asum( + space.sycl_queue(), N, + reinterpret_cast*>( + X.data()), + 1, res.data()); + } else { + oneapi::mkl::blas::column_major::asum(space.sycl_queue(), X.extent_int(0), + X.data(), 1, res.data()); + } + } + // Bring result back to host + Kokkos::deep_copy(space, R, res); +} + +#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Nrm1< \ + EXECSPACE, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + 1, true, ETI_SPEC_AVAIL> { \ + using execution_space = EXECSPACE; \ + using RV = Kokkos::View::mag_type, \ + LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits>; \ + using XV = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using size_type = typename XV::size_type; \ + \ + static void nrm1(const execution_space& space, RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ONEMKL," #SCALAR \ + "]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + onemklAsumWrapper(space, R, X); \ + } else { \ + Nrm1::nrm1(space, \ + R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutRight, Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutRight, Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, + Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, + Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLDeviceUSMSpace, true) } // namespace Impl } // namespace KokkosBlas -#endif +#endif // KOKKOS_ENABLE_SYCL +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL #endif diff --git a/blas/unit_test/Test_Blas1_nrm1.hpp b/blas/unit_test/Test_Blas1_nrm1.hpp index f6938c5147..24795878d1 100644 --- a/blas/unit_test/Test_Blas1_nrm1.hpp +++ b/blas/unit_test/Test_Blas1_nrm1.hpp @@ -22,10 +22,10 @@ namespace Test { template void impl_test_nrm1(int N) { - typedef typename ViewTypeA::value_type ScalarA; - typedef Kokkos::ArithTraits AT; - typedef typename AT::mag_type mag_type; - typedef Kokkos::ArithTraits MAT; + using ScalarA = typename ViewTypeA::value_type; + using AT = Kokkos::ArithTraits; + using mag_type = typename AT::mag_type; + using MAT = Kokkos::ArithTraits; view_stride_adapter a("a", N); From 9007f55f6ac3779112db34db8318a30797c63a2a Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 20 Nov 2023 17:07:20 -0700 Subject: [PATCH 2/4] BLAS: Nrm1 implementing Brian's feedback --- blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 194 +++++++++++-------- 1 file changed, 110 insertions(+), 84 deletions(-) diff --git a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index 2e2c98a579..bbd7e4139e 100644 --- a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -39,26 +39,34 @@ inline void nrm1_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(SCALAR, LAYOUT, EXECSPACE, \ + MEMSPACE) \ + template <> \ struct Nrm1< \ - ExecSpace, \ + EXECSPACE, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ + 1, true, \ + nrm1_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ using mag_type = typename Kokkos::ArithTraits::mag_type; \ using RV = Kokkos::View>; \ using XV = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ + static void nrm1(const EXECSPACE& space, RV& R, const XV& X) { \ Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_BLAS," #SCALAR "]"); \ const size_type numElems = X.extent(0); \ if (numElems < static_cast(INT_MAX)) { \ @@ -73,31 +81,35 @@ namespace Impl { R() = HostBlas::asum(N, X.data(), one); \ } \ } else { \ - Nrm1::nrm1(space, R, X); \ + Nrm1::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) - -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) - +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial, + Kokkos::HostSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) - + Kokkos::Serial, Kokkos::HostSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, true) + Kokkos::Serial, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP, Kokkos::HostSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace, false) + Kokkos::OpenMP, Kokkos::HostSpace) +#endif } // namespace Impl } // namespace KokkosBlas @@ -145,7 +157,7 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, } #define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE, ETI_SPEC_AVAIL) \ + MEMSPACE) \ template <> \ struct Nrm1< \ EXECSPACE, \ @@ -154,7 +166,15 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ + 1, true, \ + nrm1_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ using execution_space = EXECSPACE; \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ @@ -171,36 +191,37 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, if (numElems < static_cast(INT_MAX)) { \ cublasAsumWrapper(space, R, X); \ } else { \ - Nrm1::nrm1(space, \ - R, X); \ + Nrm1::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) - -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) + Kokkos::CudaSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) - + Kokkos::CudaSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, + Kokkos::CudaSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) + Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, true) + Kokkos::CudaUVMSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace, false) + Kokkos::CudaUVMSpace) +#endif } // namespace Impl } // namespace KokkosBlas @@ -247,8 +268,7 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); } -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ template \ struct Nrm1< \ ExecSpace, \ @@ -257,7 +277,15 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ + 1, true, \ + nrm1_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ @@ -273,35 +301,22 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, if (numElems < static_cast(INT_MAX)) { \ rocblasAsumWrapper(space, R, X); \ } else { \ - Nrm1::nrm1(space, R, X); \ + Nrm1::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(float, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace, true) + Kokkos::HIPSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(double, Kokkos::LayoutLeft, - Kokkos::HIPSpace, false) - -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace, - true) + Kokkos::HIPSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace, - false) - -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace, - true) + Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::HIPSpace, - false) + Kokkos::LayoutLeft, Kokkos::HIPSpace) } // namespace Impl } // namespace KokkosBlas @@ -327,8 +342,7 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, using KAT_X = Kokkos::ArithTraits; using layout_t = typename XViewType::array_layout; - const std::int64_t N = static_cast(X.extent(0)); - const std::int64_t one = Kokkos::ArithTraits::one(); + const std::int64_t N = static_cast(X.extent(0)); // Create temp view on device to store the result Kokkos::View::mag_type, @@ -373,7 +387,15 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - 1, true, ETI_SPEC_AVAIL> { \ + 1, true, \ + nrm1_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::HostSpace, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ using execution_space = EXECSPACE; \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ @@ -390,33 +412,37 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, if (numElems < static_cast(INT_MAX)) { \ onemklAsumWrapper(space, R, X); \ } else { \ - Nrm1::nrm1(space, \ - R, X); \ + Nrm1::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutRight, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) + Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutRight, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) + Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, - Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) + Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) -KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutRight, + Kokkos::Experimental::SYCLDeviceUSMSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_SYCLSHAREDSPACE) +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Experimental::SYCL, + Kokkos::Experimental::SYCLSharedUSMSpace) +KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, - Kokkos::Experimental::SYCLDeviceUSMSpace, true) + Kokkos::Experimental::SYCLSharedUSMSpace) +#endif } // namespace Impl } // namespace KokkosBlas From d7f5e8ea237e48aba9a6645e9dc2ca5d5a63b5b1 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 21 Nov 2023 10:39:09 -0700 Subject: [PATCH 3/4] Blas: nrm1, fix in tpl spec decl --- blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index bbd7e4139e..79822b452e 100644 --- a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -50,7 +50,7 @@ namespace Impl { Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ - nrm1_tpl_spec_avail< \ + nrm1_eti_spec_avail< \ EXECSPACE, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ @@ -82,7 +82,7 @@ namespace Impl { } \ } else { \ Nrm1::value>::nrm1(space, R, \ + nrm1_eti_spec_avail::value>::nrm1(space, R, \ X); \ } \ Kokkos::Profiling::popRegion(); \ @@ -167,7 +167,7 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ - nrm1_tpl_spec_avail< \ + nrm1_eti_spec_avail< \ EXECSPACE, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ @@ -192,7 +192,7 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, cublasAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ + nrm1_eti_spec_avail::value>::nrm1(space, R, \ X); \ } \ Kokkos::Profiling::popRegion(); \ @@ -278,7 +278,7 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ - nrm1_tpl_spec_avail< \ + nrm1_eti_spec_avail< \ ExecSpace, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ @@ -302,7 +302,7 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, rocblasAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ + nrm1_eti_spec_avail::value>::nrm1(space, R, \ X); \ } \ Kokkos::Profiling::popRegion(); \ @@ -388,7 +388,7 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ - nrm1_tpl_spec_avail< \ + nrm1_eti_spec_avail< \ EXECSPACE, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ @@ -413,7 +413,7 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, onemklAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ + nrm1_eti_spec_avail::value>::nrm1(space, R, \ X); \ } \ Kokkos::Profiling::popRegion(); \ From b1cea63873fcedeb60c2ed6c29e930c5a6621a4f Mon Sep 17 00:00:00 2001 From: "Luc Berger-Vergiat (-EXP)" Date: Tue, 21 Nov 2023 15:39:18 -0700 Subject: [PATCH 4/4] BLAS: nrm1 problems with ExecSpace template and lack of Kokkos::Threads Fix issue with Kokkos::Threads and Kokkos::HIP --- blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 109 ++++++++++--------- 1 file changed, 57 insertions(+), 52 deletions(-) diff --git a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index 79822b452e..c695eaee1e 100644 --- a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -111,6 +111,17 @@ KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) #endif +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +#endif + } // namespace Impl } // namespace KokkosBlas @@ -156,31 +167,31 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); } -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ template <> \ struct Nrm1< \ - EXECSPACE, \ + Kokkos::Cuda, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ nrm1_eti_spec_avail< \ - EXECSPACE, \ + Kokkos::Cuda, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ - using execution_space = EXECSPACE; \ + using execution_space = Kokkos::Cuda; \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ using XV = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ @@ -192,35 +203,31 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, cublasAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ - X); \ + nrm1_eti_spec_avail::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::Cuda, - Kokkos::CudaUVMSpace) + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) #endif } // namespace Impl @@ -269,41 +276,42 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, } #define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ + template <> \ struct Nrm1< \ - ExecSpace, \ + Kokkos::HIP, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ nrm1_eti_spec_avail< \ - ExecSpace, \ + Kokkos::HIP, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ using XV = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ + static void nrm1(const Kokkos::HIP& space, RV& R, const XV& X) { \ Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS," #SCALAR \ "]"); \ const size_type numElems = X.extent(0); \ if (numElems < static_cast(INT_MAX)) { \ rocblasAsumWrapper(space, R, X); \ } else { \ - Nrm1::value>::nrm1(space, R, \ - X); \ + Nrm1::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ @@ -377,32 +385,33 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::deep_copy(space, R, res); } -#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ +#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, MEMSPACE) \ template <> \ struct Nrm1< \ - EXECSPACE, \ + Kokkos::Experimental::SYCL, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ nrm1_eti_spec_avail< \ - EXECSPACE, \ + Kokkos::Experimental::SYCL, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ - using execution_space = EXECSPACE; \ + using execution_space = Kokkos::Experimental::SYCL; \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits>; \ + using XV = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ static void nrm1(const execution_space& space, RV& R, const XV& X) { \ @@ -413,34 +422,30 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, onemklAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ - X); \ + nrm1_eti_spec_avail::value>::nrm1(space, R, X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_SYCLSHAREDSPACE) -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLSharedUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLSharedUSMSpace) #endif