[SPARSE] Add support for cuSPARSE backend (#527)

oneapi-src · Oct 29, 2024 · c8dc9a9 · c8dc9a9
1 parent 1462c0b
commit c8dc9a9
Show file tree

Hide file tree

Showing 59 changed files with 4,113 additions and 1,032 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF
 option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF)
 option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF)
 
+# sparse
+option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)
+
 set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
 set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")
 
@@ -102,7 +105,8 @@ if(ENABLE_MKLGPU_BACKEND
   list(APPEND DOMAINS_LIST "dft")
 endif()
 if(ENABLE_MKLCPU_BACKEND
-        OR ENABLE_MKLGPU_BACKEND)
+        OR ENABLE_MKLGPU_BACKEND
+        OR ENABLE_CUSPARSE_BACKEND)
   list(APPEND DOMAINS_LIST "sparse_blas")
 endif()
 
@@ -129,7 +133,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
     string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
   endif()
 else()
-  if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND
+  if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
     OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
     set(CMAKE_CXX_COMPILER "clang++")
   elseif(ENABLE_MKLGPU_BACKEND)

diff --git a/README.md b/README.md
@@ -18,8 +18,8 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
     </thead>
     <tbody>
         <tr>
-            <td rowspan=12 align="center">oneMKL interface</td>
-            <td rowspan=12 align="center">oneMKL selector</td>
+            <td rowspan=13 align="center">oneMKL interface</td>
+            <td rowspan=13 align="center">oneMKL selector</td>
             <td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library (oneMKL)</a></td>
             <td align="center">x86 CPU, Intel GPU</td>
         </tr>
@@ -28,10 +28,10 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
             <td align="center"><a href="https://developer.nvidia.com/cublas"> NVIDIA cuBLAS</a></td>
             <td align="center">NVIDIA GPU</td>
         </tr>
-	<tr>
+        <tr>
             <td align="center"><a href="https://developer.nvidia.com/cusolver"> NVIDIA cuSOLVER</a></td>
             <td align="center">NVIDIA GPU</td>
-	</tr>
+        </tr>
         <tr>
             <td align="center"><a href="https://developer.nvidia.com/curand"> NVIDIA cuRAND</a></td>
             <td align="center">NVIDIA GPU</td>
@@ -40,6 +40,10 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
             <td align="center"><a href="https://developer.nvidia.com/cufft"> NVIDIA cuFFT</a></td>
             <td align="center">NVIDIA GPU</td>
         </tr>
+        <tr>
+            <td align="center"><a href="https://developer.nvidia.com/cusparse"> NVIDIA cuSPARSE</a></td>
+            <td align="center">NVIDIA GPU</td>
+        </tr>
         <tr>
             <td align="center"><a href="https://ww.netlib.org"> NETLIB LAPACK</a> </td>
             <td align="center">x86 CPU</td>
@@ -329,7 +333,7 @@ Supported compilers include:
             <td align="center">Dynamic, Static</td>
         </tr>
         <tr>
-            <td rowspan=2 align="center">SPARSE_BLAS</td>
+            <td rowspan=3 align="center">SPARSE_BLAS</td>
             <td align="center">x86 CPU</td>
             <td align="center">Intel(R) oneMKL</td>
             <td align="center">Intel DPC++</td>
@@ -341,6 +345,12 @@ Supported compilers include:
             <td align="center">Intel DPC++</td>
             <td align="center">Dynamic, Static</td>
         </tr>
+        <tr>
+            <td align="center">NVIDIA GPU</td>
+            <td align="center">NVIDIA cuSPARSE</td>
+            <td align="center">Open DPC++</td>
+            <td align="center">Dynamic, Static</td>
+        </tr>
     </tbody>
 </table>
 

diff --git a/cmake/FindCompiler.cmake b/cmake/FindCompiler.cmake
@@ -37,7 +37,7 @@ if(is_dpcpp)
     # Check if the Nvidia target is supported. PortFFT uses this for choosing default configuration.
     check_cxx_compiler_flag("-fsycl -fsycl-targets=nvptx64-nvidia-cuda" dpcpp_supports_nvptx64)
 
-    if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
+    if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND)
       list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
         -fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
       list(APPEND UNIX_INTERFACE_LINK_OPTIONS
@@ -51,7 +51,7 @@ if(is_dpcpp)
         -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend 
 	--offload-arch=${HIP_TARGETS})
     endif()
-    if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND
+    if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
 	    OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
       set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
         INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"

diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst
@@ -104,6 +104,9 @@ The most important supported build options are:
    * - ENABLE_CURAND_BACKEND
      - True, False
      - False     
+   * - ENABLE_CUSPARSE_BACKEND
+     - True, False
+     - False     
    * - ENABLE_NETLIB_BACKEND
      - True, False
      - False     
@@ -183,8 +186,8 @@ Building for CUDA
 ^^^^^^^^^^^^^^^^^
 
 The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
-``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``, and
-``ENABLE_CUSOLVER_BACKEND``.
+``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``,
+``ENABLE_CUSOLVER_BACKEND``, and ``ENABLE_CUSPARSE_BACKEND``.
 
 No additional parameters are required for using CUDA libraries. In most cases,
 the CUDA libraries should be found automatically by CMake.
@@ -371,6 +374,7 @@ disabled using the Ninja build system:
       -DENABLE_CUBLAS_BACKEND=True \
       -DENABLE_CUSOLVER_BACKEND=True \
       -DENABLE_CURAND_BACKEND=True \
+      -DENABLE_CUSPARSE_BACKEND=True \
       -DBUILD_FUNCTIONAL_TESTS=False
 
 ``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)

diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst
@@ -20,21 +20,150 @@ Currently known limitations:
 - ``oneapi::mkl::sparse::set_csr_data`` and
   ``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle
   that has already been used for an operation or its optimize function. Doing so
-  will throw an ``oneapi::mkl::unimplemented`` exception.
+  will throw a ``oneapi::mkl::unimplemented`` exception.
 - Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and
   a sparse matrix that does not have the
-  ``oneapi::mkl::sparse::matrix_property::sorted`` property will throw an
+  ``oneapi::mkl::sparse::matrix_property::sorted`` property will throw a
   ``oneapi::mkl::unimplemented`` exception.
 - Using ``spmm`` on Intel GPU with a sparse matrix that is
   ``oneapi::mkl::transpose::conjtrans`` and has the
-  ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an
+  ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw a
   ``oneapi::mkl::unimplemented`` exception.
 - Using ``spmv`` with a sparse matrix that is
   ``oneapi::mkl::transpose::conjtrans`` with a ``type_view``
-  ``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw an
+  ``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw a
   ``oneapi::mkl::unimplemented`` exception.
 - Using ``spsv`` on Intel GPU with a sparse matrix that is
-  ``oneapi::mkl::transpose::conjtrans`` and will throw an
+  ``oneapi::mkl::transpose::conjtrans`` and will throw a
   ``oneapi::mkl::unimplemented`` exception.
 - Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
   synchronizations and copies to the host.
+
+
+cuSPARSE backend
+----------------
+
+Currently known limitations:
+
+- The COO format requires the indices to be sorted by row. See the `cuSPARSE
+  documentation
+  <https://docs.nvidia.com/cuda/cusparse/index.html#coordinate-coo>`_. Sparse
+  operations using matrices with the COO format without the property
+  ``matrix_property::sorted_by_rows`` or ``matrix_property::sorted`` will throw
+  a ``oneapi::mkl::unimplemented`` exception.
+- Using ``spmm`` with the algorithm ``spmm_alg::csr_alg3`` and an ``opA`` other
+  than ``transpose::nontrans`` or an ``opB`` ``transpose::conjtrans`` will throw
+  a ``oneapi::mkl::unimplemented`` exception.
+- Using ``spmm`` with the algorithm ``spmm_alg::csr_alg3``,
+  ``opB=transpose::trans`` and real fp64 precision will throw a
+  ``oneapi::mkl::unimplemented`` exception. This configuration can fail as of
+  CUDA 12.6.2, see the related issue
+  `here<https://forums.developer.nvidia.com/t/cusparse-spmm-sample-failing-with-misaligned-address/311022>`_.
+- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
+  throw a ``oneapi::mkl::unimplemented`` exception.
+- Using ``spsv`` with the algorithm ``spsv_alg::no_optimize_alg`` may still
+  perform some mandatory preprocessing.
+- oneMKL Interface does not provide a way to use non-default algorithms without
+  calling preprocess functions such as ``cusparseSpMM_preprocess`` or
+  ``cusparseSpMV_preprocess``. Feel free to create an issue if this is needed.
+
+
+Operation algorithms mapping
+----------------------------
+
+The following tables describe how a oneMKL SYCL Interface algorithm maps to the
+backend's algorithms. Refer to the backend's documentation for a more detailed
+explanation of the algorithms.
+
+Backends with no equivalent algorithms will fallback to the backend's default
+behavior.
+
+
+spmm
+^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: 10 30 45
+
+   * - ``spmm_alg`` value
+     - MKLCPU/MKLGPU
+     - cuSPARSE
+   * - ``default_alg``
+     - none
+     - ``CUSPARSE_SPMM_ALG_DEFAULT``
+   * - ``no_optimize_alg``
+     - none
+     - ``CUSPARSE_SPMM_ALG_DEFAULT``
+   * - ``coo_alg1``
+     - none
+     - ``CUSPARSE_SPMM_COO_ALG1``
+   * - ``coo_alg2``
+     - none
+     - ``CUSPARSE_SPMM_COO_ALG2``
+   * - ``coo_alg3``
+     - none
+     - ``CUSPARSE_SPMM_COO_ALG3``
+   * - ``coo_alg4``
+     - none
+     - ``CUSPARSE_SPMM_COO_ALG4``
+   * - ``csr_alg1``
+     - none
+     - ``CUSPARSE_SPMM_CSR_ALG1``
+   * - ``csr_alg2``
+     - none
+     - ``CUSPARSE_SPMM_CSR_ALG2``
+   * - ``csr_alg3``
+     - none
+     - ``CUSPARSE_SPMM_CSR_ALG3``
+
+
+spmv
+^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: 10 30 45
+
+   * - ``spmv_alg`` value
+     - MKLCPU/MKLGPU
+     - cuSPARSE
+   * - ``default_alg``
+     - none
+     - ``CUSPARSE_SPMV_ALG_DEFAULT``
+   * - ``no_optimize_alg``
+     - none
+     - ``CUSPARSE_SPMV_ALG_DEFAULT``
+   * - ``coo_alg1``
+     - none
+     - ``CUSPARSE_SPMV_COO_ALG1``
+   * - ``coo_alg2``
+     - none
+     - ``CUSPARSE_SPMV_COO_ALG2``
+   * - ``csr_alg1``
+     - none
+     - ``CUSPARSE_SPMV_CSR_ALG1``
+   * - ``csr_alg2``
+     - none
+     - ``CUSPARSE_SPMV_CSR_ALG2``
+   * - ``csr_alg3``
+     - none
+     - ``CUSPARSE_SPMV_ALG_DEFAULT``
+
+
+spsv
+^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: 10 30 45
+
+   * - ``spsv_alg`` value
+     - MKLCPU/MKLGPU
+     - cuSPARSE
+   * - ``default_alg``
+     - none
+     - ``CUSPARSE_SPSV_ALG_DEFAULT``
+   * - ``no_optimize_alg``
+     - none
+     - ``CUSPARSE_SPSV_ALG_DEFAULT``