Skip to content

Commit

Permalink
[SPARSE] Add support for cuSPARSE backend (#527)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rbiessy authored Oct 29, 2024
1 parent 1462c0b commit c8dc9a9
Show file tree
Hide file tree
Showing 59 changed files with 4,113 additions and 1,032 deletions.
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF
option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF)
option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF)

# sparse
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)

set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")

Expand Down Expand Up @@ -102,7 +105,8 @@ if(ENABLE_MKLGPU_BACKEND
list(APPEND DOMAINS_LIST "dft")
endif()
if(ENABLE_MKLCPU_BACKEND
OR ENABLE_MKLGPU_BACKEND)
OR ENABLE_MKLGPU_BACKEND
OR ENABLE_CUSPARSE_BACKEND)
list(APPEND DOMAINS_LIST "sparse_blas")
endif()

Expand All @@ -129,7 +133,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
Expand Down
20 changes: 15 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
</thead>
<tbody>
<tr>
<td rowspan=12 align="center">oneMKL interface</td>
<td rowspan=12 align="center">oneMKL selector</td>
<td rowspan=13 align="center">oneMKL interface</td>
<td rowspan=13 align="center">oneMKL selector</td>
<td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library (oneMKL)</a></td>
<td align="center">x86 CPU, Intel GPU</td>
</tr>
Expand All @@ -28,10 +28,10 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
<td align="center"><a href="https://developer.nvidia.com/cublas"> NVIDIA cuBLAS</a></td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<tr>
<td align="center"><a href="https://developer.nvidia.com/cusolver"> NVIDIA cuSOLVER</a></td>
<td align="center">NVIDIA GPU</td>
</tr>
</tr>
<tr>
<td align="center"><a href="https://developer.nvidia.com/curand"> NVIDIA cuRAND</a></td>
<td align="center">NVIDIA GPU</td>
Expand All @@ -40,6 +40,10 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
<td align="center"><a href="https://developer.nvidia.com/cufft"> NVIDIA cuFFT</a></td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<td align="center"><a href="https://developer.nvidia.com/cusparse"> NVIDIA cuSPARSE</a></td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<td align="center"><a href="https://ww.netlib.org"> NETLIB LAPACK</a> </td>
<td align="center">x86 CPU</td>
Expand Down Expand Up @@ -329,7 +333,7 @@ Supported compilers include:
<td align="center">Dynamic, Static</td>
</tr>
<tr>
<td rowspan=2 align="center">SPARSE_BLAS</td>
<td rowspan=3 align="center">SPARSE_BLAS</td>
<td align="center">x86 CPU</td>
<td align="center">Intel(R) oneMKL</td>
<td align="center">Intel DPC++</td>
Expand All @@ -341,6 +345,12 @@ Supported compilers include:
<td align="center">Intel DPC++</td>
<td align="center">Dynamic, Static</td>
</tr>
<tr>
<td align="center">NVIDIA GPU</td>
<td align="center">NVIDIA cuSPARSE</td>
<td align="center">Open DPC++</td>
<td align="center">Dynamic, Static</td>
</tr>
</tbody>
</table>

Expand Down
4 changes: 2 additions & 2 deletions cmake/FindCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if(is_dpcpp)
# Check if the Nvidia target is supported. PortFFT uses this for choosing default configuration.
check_cxx_compiler_flag("-fsycl -fsycl-targets=nvptx64-nvidia-cuda" dpcpp_supports_nvptx64)

if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND)
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
-fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
Expand All @@ -51,7 +51,7 @@ if(is_dpcpp)
-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend
--offload-arch=${HIP_TARGETS})
endif()
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
Expand Down
8 changes: 6 additions & 2 deletions docs/building_the_project_with_dpcpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ The most important supported build options are:
* - ENABLE_CURAND_BACKEND
- True, False
- False
* - ENABLE_CUSPARSE_BACKEND
- True, False
- False
* - ENABLE_NETLIB_BACKEND
- True, False
- False
Expand Down Expand Up @@ -183,8 +186,8 @@ Building for CUDA
^^^^^^^^^^^^^^^^^

The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``, and
``ENABLE_CUSOLVER_BACKEND``.
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``,
``ENABLE_CUSOLVER_BACKEND``, and ``ENABLE_CUSPARSE_BACKEND``.

No additional parameters are required for using CUDA libraries. In most cases,
the CUDA libraries should be found automatically by CMake.
Expand Down Expand Up @@ -371,6 +374,7 @@ disabled using the Ninja build system:
-DENABLE_CUBLAS_BACKEND=True \
-DENABLE_CUSOLVER_BACKEND=True \
-DENABLE_CURAND_BACKEND=True \
-DENABLE_CUSPARSE_BACKEND=True \
-DBUILD_FUNCTIONAL_TESTS=False
``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)
Expand Down
139 changes: 134 additions & 5 deletions docs/domains/sparse_linear_algebra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,150 @@ Currently known limitations:
- ``oneapi::mkl::sparse::set_csr_data`` and
``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle
that has already been used for an operation or its optimize function. Doing so
will throw an ``oneapi::mkl::unimplemented`` exception.
will throw a ``oneapi::mkl::unimplemented`` exception.
- Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and
a sparse matrix that does not have the
``oneapi::mkl::sparse::matrix_property::sorted`` property will throw an
``oneapi::mkl::sparse::matrix_property::sorted`` property will throw a
``oneapi::mkl::unimplemented`` exception.
- Using ``spmm`` on Intel GPU with a sparse matrix that is
``oneapi::mkl::transpose::conjtrans`` and has the
``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an
``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw a
``oneapi::mkl::unimplemented`` exception.
- Using ``spmv`` with a sparse matrix that is
``oneapi::mkl::transpose::conjtrans`` with a ``type_view``
``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw an
``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw a
``oneapi::mkl::unimplemented`` exception.
- Using ``spsv`` on Intel GPU with a sparse matrix that is
``oneapi::mkl::transpose::conjtrans`` and will throw an
``oneapi::mkl::transpose::conjtrans`` and will throw a
``oneapi::mkl::unimplemented`` exception.
- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
synchronizations and copies to the host.


cuSPARSE backend
----------------

Currently known limitations:

- The COO format requires the indices to be sorted by row. See the `cuSPARSE
documentation
<https://docs.nvidia.com/cuda/cusparse/index.html#coordinate-coo>`_. Sparse
operations using matrices with the COO format without the property
``matrix_property::sorted_by_rows`` or ``matrix_property::sorted`` will throw
a ``oneapi::mkl::unimplemented`` exception.
- Using ``spmm`` with the algorithm ``spmm_alg::csr_alg3`` and an ``opA`` other
than ``transpose::nontrans`` or an ``opB`` ``transpose::conjtrans`` will throw
a ``oneapi::mkl::unimplemented`` exception.
- Using ``spmm`` with the algorithm ``spmm_alg::csr_alg3``,
``opB=transpose::trans`` and real fp64 precision will throw a
``oneapi::mkl::unimplemented`` exception. This configuration can fail as of
CUDA 12.6.2, see the related issue
`here<https://forums.developer.nvidia.com/t/cusparse-spmm-sample-failing-with-misaligned-address/311022>`_.
- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
throw a ``oneapi::mkl::unimplemented`` exception.
- Using ``spsv`` with the algorithm ``spsv_alg::no_optimize_alg`` may still
perform some mandatory preprocessing.
- oneMKL Interface does not provide a way to use non-default algorithms without
calling preprocess functions such as ``cusparseSpMM_preprocess`` or
``cusparseSpMV_preprocess``. Feel free to create an issue if this is needed.


Operation algorithms mapping
----------------------------

The following tables describe how a oneMKL SYCL Interface algorithm maps to the
backend's algorithms. Refer to the backend's documentation for a more detailed
explanation of the algorithms.

Backends with no equivalent algorithms will fallback to the backend's default
behavior.


spmm
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - ``spmm_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``coo_alg1``
- none
- ``CUSPARSE_SPMM_COO_ALG1``
* - ``coo_alg2``
- none
- ``CUSPARSE_SPMM_COO_ALG2``
* - ``coo_alg3``
- none
- ``CUSPARSE_SPMM_COO_ALG3``
* - ``coo_alg4``
- none
- ``CUSPARSE_SPMM_COO_ALG4``
* - ``csr_alg1``
- none
- ``CUSPARSE_SPMM_CSR_ALG1``
* - ``csr_alg2``
- none
- ``CUSPARSE_SPMM_CSR_ALG2``
* - ``csr_alg3``
- none
- ``CUSPARSE_SPMM_CSR_ALG3``


spmv
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - ``spmv_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
* - ``coo_alg1``
- none
- ``CUSPARSE_SPMV_COO_ALG1``
* - ``coo_alg2``
- none
- ``CUSPARSE_SPMV_COO_ALG2``
* - ``csr_alg1``
- none
- ``CUSPARSE_SPMV_CSR_ALG1``
* - ``csr_alg2``
- none
- ``CUSPARSE_SPMV_CSR_ALG2``
* - ``csr_alg3``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``


spsv
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - ``spsv_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPSV_ALG_DEFAULT``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPSV_ALG_DEFAULT``
Loading

0 comments on commit c8dc9a9

Please sign in to comment.