diff --git a/src/blas/backends/cublas/cublas_scope_handle.hpp b/src/blas/backends/cublas/cublas_scope_handle.hpp index 4a90f3fe9..b61d21bae 100644 --- a/src/blas/backends/cublas/cublas_scope_handle.hpp +++ b/src/blas/backends/cublas/cublas_scope_handle.hpp @@ -1,21 +1,21 @@ /*************************************************************************** -* Copyright (C) Codeplay Software Limited -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* For your convenience, a copy of the License has been included in this -* repository. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -**************************************************************************/ + * Copyright (C) Codeplay Software Limited + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * For your convenience, a copy of the License has been included in this + * repository. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **************************************************************************/ #ifndef _CUBLAS_SCOPED_HANDLE_HPP_ #define _CUBLAS_SCOPED_HANDLE_HPP_ #if __has_include() @@ -24,12 +24,12 @@ #include #endif +#include "cublas_handle.hpp" +#include "cublas_helper.hpp" #include #include #include #include -#include "cublas_helper.hpp" -#include "cublas_handle.hpp" namespace oneapi { namespace mkl { @@ -38,45 +38,45 @@ namespace cublas { /** * @brief NVIDIA advise for handle creation: -https://devtalk.nvidia.com/default/topic/838794/gpu-accelerated libraries/using-cublas-in-different-cuda-streams/ -According to NVIDIA: -1) It is required that different handles to be used for different devices: - http://docs.nvidia.com/cuda/cublas/index.html#cublas-context -2) It is recommended (but not required, if care is taken) that different handles be used for different host threads: +https://devtalk.nvidia.com/default/topic/838794/gpu-accelerated +libraries/using-cublas-in-different-cuda-streams/ According to NVIDIA: 1) +It is required that different handles to be used for different devices: + http://docs.nvidia.com/cuda/cublas/index.html#cublas-context +2) It is recommended (but not required, if care is taken) that different +handles be used for different host threads: http://docs.nvidia.com/cuda/cublas/index.html#thread-safety2changeme -3) It is neither required nor recommended that different handles be used for different streams on the same device, - using the same host thread. +3) It is neither required nor recommended that different handles be used +for different streams on the same device, using the same host thread. **/ class CublasScopedContextHandler { - sycl::interop_handle& ih; - CUdevice nativeDevice; - static thread_local std::shared_ptr cublasHandle; - CUstream get_stream(const sycl::queue& queue); - sycl::context get_context(const sycl::queue& queue); + sycl::interop_handle &ih; + CUdevice nativeDevice; + static thread_local std::shared_ptr cublasHandle; + CUstream get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue &queue); public: - CublasScopedContextHandler(sycl::interop_handle& ih); + CublasScopedContextHandler(sycl::interop_handle &ih); - /** - * @brief get_handle: creates the handle by implicitly impose the advice - * given by nvidia for creating a cublas_handle. (e.g. one cuStream per device - * per thread). + /** + * @brief get_handle: returns the handle assigned to the device from + * sycl::queue. * @param queue sycl queue. * @return cublasHandle_t a handle to construct cublas routines */ - cublasHandle_t get_handle(const sycl::queue& queue); - // This is a work-around function for reinterpret_casting the memory. This - // will be fixed when SYCL-2020 has been implemented for Pi backend. - template - inline T get_mem(U acc) { - CUdeviceptr cudaPtr = ih.get_native_mem(acc); - return reinterpret_cast(cudaPtr); - } + cublasHandle_t get_handle(const sycl::queue &queue); + // This is a work-around function for reinterpret_casting the memory. This + // will be fixed when SYCL-2020 has been implemented for Pi backend. + template inline T get_mem(U acc) { + CUdeviceptr cudaPtr = + ih.get_native_mem(acc); + return reinterpret_cast(cudaPtr); + } - void wait_stream(const sycl::queue& queue) { - cuStreamSynchronize(get_stream(queue)); - } + void wait_stream(const sycl::queue &queue) { + cuStreamSynchronize(get_stream(queue)); + } }; } // namespace cublas